/* fe_x25519_asm.S */
/*
* Copyright (C) 2006-2026 wolfSSL Inc.
*
* This file is part of wolfSSL.
*
* wolfSSL is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* wolfSSL is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
*/
#ifdef WOLFSSL_USER_SETTINGS
#ifdef WOLFSSL_USER_SETTINGS_ASM
/*
* user_settings_asm.h is a file generated by the script user_settings_asm.sh.
* The script takes in a user_settings.h and produces user_settings_asm.h, which
* is a stripped down version of user_settings.h containing only preprocessor
* directives. This makes the header safe to include in assembly (.S) files.
*/
#include "user_settings_asm.h"
#else
/*
* Note: if user_settings.h contains any C code (e.g. a typedef or function
* prototype), including it here in an assembly (.S) file will cause an
* assembler failure. See user_settings_asm.h above.
*/
#include "user_settings.h"
#endif /* WOLFSSL_USER_SETTINGS_ASM */
#endif /* WOLFSSL_USER_SETTINGS */
#ifndef HAVE_INTEL_AVX1
#define HAVE_INTEL_AVX1
#endif /* HAVE_INTEL_AVX1 */
#ifndef NO_AVX2_SUPPORT
#ifndef HAVE_INTEL_AVX2
#define HAVE_INTEL_AVX2
#endif /* HAVE_INTEL_AVX2 */
#endif /* NO_AVX2_SUPPORT */
#ifndef __APPLE__
.text
.globl fe_init
.type fe_init,@function
.align 16
fe_init:
#else
.section __TEXT,__text
.globl _fe_init
.p2align 4
_fe_init:
#endif /* __APPLE__ */
#ifdef HAVE_INTEL_AVX2
#ifndef __APPLE__
movq cpuFlagsSet@GOTPCREL(%rip), %rax
movl (%rax), %eax
#else
movl _cpuFlagsSet(%rip), %eax
#endif /* __APPLE__ */
testl %eax, %eax
je L_fe_init_get_flags
repz retq
L_fe_init_get_flags:
#ifndef __APPLE__
callq cpuid_get_flags@plt
#else
callq _cpuid_get_flags
#endif /* __APPLE__ */
#ifndef __APPLE__
movq intelFlags@GOTPCREL(%rip), %rdx
movl %eax, (%rdx)
#else
movl %eax, _intelFlags(%rip)
#endif /* __APPLE__ */
andl $0x50, %eax
cmpl $0x50, %eax
jne L_fe_init_flags_done
#ifndef __APPLE__
movq fe_cmov_table_avx2@GOTPCREL(%rip), %rax
#else
leaq _fe_cmov_table_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq fe_cmov_table_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _fe_cmov_table_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
movq fe_mul_avx2@GOTPCREL(%rip), %rax
#else
leaq _fe_mul_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq fe_mul_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _fe_mul_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
movq fe_sq_avx2@GOTPCREL(%rip), %rax
#else
leaq _fe_sq_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq fe_sq_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _fe_sq_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
movq fe_mul121666_avx2@GOTPCREL(%rip), %rax
#else
leaq _fe_mul121666_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq fe_mul121666_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _fe_mul121666_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
movq fe_invert_avx2@GOTPCREL(%rip), %rax
#else
leaq _fe_invert_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq fe_invert_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _fe_invert_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
movq curve25519_avx2@GOTPCREL(%rip), %rax
#else
leaq _curve25519_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq curve25519_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _curve25519_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
movq fe_pow22523_avx2@GOTPCREL(%rip), %rax
#else
leaq _fe_pow22523_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq fe_pow22523_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _fe_pow22523_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
movq ge_p1p1_to_p2_avx2@GOTPCREL(%rip), %rax
#else
leaq _ge_p1p1_to_p2_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq ge_p1p1_to_p2_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _ge_p1p1_to_p2_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
movq ge_p1p1_to_p3_avx2@GOTPCREL(%rip), %rax
#else
leaq _ge_p1p1_to_p3_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq ge_p1p1_to_p3_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _ge_p1p1_to_p3_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
movq ge_p2_dbl_avx2@GOTPCREL(%rip), %rax
#else
leaq _ge_p2_dbl_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq ge_p2_dbl_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _ge_p2_dbl_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
movq ge_madd_avx2@GOTPCREL(%rip), %rax
#else
leaq _ge_madd_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq ge_madd_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _ge_madd_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
movq ge_msub_avx2@GOTPCREL(%rip), %rax
#else
leaq _ge_msub_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq ge_msub_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _ge_msub_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
movq ge_add_avx2@GOTPCREL(%rip), %rax
#else
leaq _ge_add_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq ge_add_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _ge_add_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
movq ge_sub_avx2@GOTPCREL(%rip), %rax
#else
leaq _ge_sub_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq ge_sub_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _ge_sub_p(%rip)
#endif /* __APPLE__ */
#if defined(WOLFSSL_CURVE25519_NOT_USE_ED25519)
#ifndef __APPLE__
movq curve25519_base_avx2@GOTPCREL(%rip), %rax
#else
leaq _curve25519_base_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq curve25519_base_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _curve25519_base_p(%rip)
#endif /* __APPLE__ */
#endif /* WOLFSSL_CURVE25519_NOT_USE_ED25519 */
#ifdef HAVE_ED25519
#ifndef __APPLE__
movq fe_sq2_avx2@GOTPCREL(%rip), %rax
#else
leaq _fe_sq2_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq fe_sq2_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _fe_sq2_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
movq fe_invert_nct_avx2@GOTPCREL(%rip), %rax
#else
leaq _fe_invert_nct_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq fe_invert_nct_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _fe_invert_nct_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
movq sc_reduce_avx2@GOTPCREL(%rip), %rax
#else
leaq _sc_reduce_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq sc_reduce_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _sc_reduce_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
movq sc_muladd_avx2@GOTPCREL(%rip), %rax
#else
leaq _sc_muladd_avx2(%rip), %rax
#endif /* __APPLE__ */
#ifndef __APPLE__
movq sc_muladd_p@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
#else
movq %rax, _sc_muladd_p(%rip)
#endif /* __APPLE__ */
#endif /* HAVE_ED25519 */
L_fe_init_flags_done:
#ifndef __APPLE__
movq cpuFlagsSet@GOTPCREL(%rip), %rdx
movl $0x1, (%rdx)
#else
movl $0x1, _cpuFlagsSet(%rip)
#endif /* __APPLE__ */
#endif /* HAVE_INTEL_AVX2 */
repz retq
#ifndef __APPLE__
.size fe_init,.-fe_init
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_frombytes
.type fe_frombytes,@function
.align 16
fe_frombytes:
#else
.section __TEXT,__text
.globl _fe_frombytes
.p2align 4
_fe_frombytes:
#endif /* __APPLE__ */
movq $0x7fffffffffffffff, %r9
movq (%rsi), %rdx
movq 8(%rsi), %rax
movq 16(%rsi), %rcx
movq 24(%rsi), %r8
andq %r9, %r8
movq %rdx, (%rdi)
movq %rax, 8(%rdi)
movq %rcx, 16(%rdi)
movq %r8, 24(%rdi)
repz retq
#ifndef __APPLE__
.size fe_frombytes,.-fe_frombytes
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_tobytes
.type fe_tobytes,@function
.align 16
fe_tobytes:
#else
.section __TEXT,__text
.globl _fe_tobytes
.p2align 4
_fe_tobytes:
#endif /* __APPLE__ */
movq $0x7fffffffffffffff, %r10
movq (%rsi), %rdx
movq 8(%rsi), %rax
movq 16(%rsi), %rcx
movq 24(%rsi), %r8
addq $19, %rdx
adcq $0x00, %rax
adcq $0x00, %rcx
adcq $0x00, %r8
shrq $63, %r8
imulq $19, %r8, %r9
movq (%rsi), %rdx
movq 8(%rsi), %rax
movq 16(%rsi), %rcx
movq 24(%rsi), %r8
addq %r9, %rdx
adcq $0x00, %rax
adcq $0x00, %rcx
adcq $0x00, %r8
andq %r10, %r8
movq %rdx, (%rdi)
movq %rax, 8(%rdi)
movq %rcx, 16(%rdi)
movq %r8, 24(%rdi)
repz retq
#ifndef __APPLE__
.size fe_tobytes,.-fe_tobytes
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_1
.type fe_1,@function
.align 16
fe_1:
#else
.section __TEXT,__text
.globl _fe_1
.p2align 4
_fe_1:
#endif /* __APPLE__ */
# Set one
movq $0x01, (%rdi)
movq $0x00, 8(%rdi)
movq $0x00, 16(%rdi)
movq $0x00, 24(%rdi)
repz retq
#ifndef __APPLE__
.size fe_1,.-fe_1
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_0
.type fe_0,@function
.align 16
fe_0:
#else
.section __TEXT,__text
.globl _fe_0
.p2align 4
_fe_0:
#endif /* __APPLE__ */
# Set zero
movq $0x00, (%rdi)
movq $0x00, 8(%rdi)
movq $0x00, 16(%rdi)
movq $0x00, 24(%rdi)
repz retq
#ifndef __APPLE__
.size fe_0,.-fe_0
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_copy
.type fe_copy,@function
.align 16
fe_copy:
#else
.section __TEXT,__text
.globl _fe_copy
.p2align 4
_fe_copy:
#endif /* __APPLE__ */
# Copy
movq (%rsi), %rdx
movq 8(%rsi), %rax
movq 16(%rsi), %rcx
movq 24(%rsi), %r8
movq %rdx, (%rdi)
movq %rax, 8(%rdi)
movq %rcx, 16(%rdi)
movq %r8, 24(%rdi)
repz retq
#ifndef __APPLE__
.size fe_copy,.-fe_copy
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_sub
.type fe_sub,@function
.align 16
fe_sub:
#else
.section __TEXT,__text
.globl _fe_sub
.p2align 4
_fe_sub:
#endif /* __APPLE__ */
# Sub
movq (%rsi), %rax
movq 8(%rsi), %rcx
movq 16(%rsi), %r8
movq 24(%rsi), %r9
subq (%rdx), %rax
sbbq 8(%rdx), %rcx
sbbq 16(%rdx), %r8
sbbq 24(%rdx), %r9
sbbq %r10, %r10
shldq $0x01, %r9, %r10
imulq $-19, %r10
btr $63, %r9
# Add modulus (if underflow)
subq %r10, %rax
sbbq $0x00, %rcx
sbbq $0x00, %r8
sbbq $0x00, %r9
movq %rax, (%rdi)
movq %rcx, 8(%rdi)
movq %r8, 16(%rdi)
movq %r9, 24(%rdi)
repz retq
#ifndef __APPLE__
.size fe_sub,.-fe_sub
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_add
.type fe_add,@function
.align 16
fe_add:
#else
.section __TEXT,__text
.globl _fe_add
.p2align 4
_fe_add:
#endif /* __APPLE__ */
# Add
movq (%rsi), %rax
movq 8(%rsi), %rcx
addq (%rdx), %rax
movq 16(%rsi), %r8
adcq 8(%rdx), %rcx
movq 24(%rsi), %r9
adcq 16(%rdx), %r8
adcq 24(%rdx), %r9
movq $0x00, %r10
adcq $0x00, %r10
shldq $0x01, %r9, %r10
imulq $19, %r10
btr $63, %r9
# Sub modulus (if overflow)
addq %r10, %rax
adcq $0x00, %rcx
adcq $0x00, %r8
adcq $0x00, %r9
movq %rax, (%rdi)
movq %rcx, 8(%rdi)
movq %r8, 16(%rdi)
movq %r9, 24(%rdi)
repz retq
#ifndef __APPLE__
.size fe_add,.-fe_add
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_neg
.type fe_neg,@function
.align 16
fe_neg:
#else
.section __TEXT,__text
.globl _fe_neg
.p2align 4
_fe_neg:
#endif /* __APPLE__ */
movq $-19, %rdx
movq $-1, %rax
movq $-1, %rcx
movq $0x7fffffffffffffff, %r8
subq (%rsi), %rdx
sbbq 8(%rsi), %rax
sbbq 16(%rsi), %rcx
sbbq 24(%rsi), %r8
movq %rdx, (%rdi)
movq %rax, 8(%rdi)
movq %rcx, 16(%rdi)
movq %r8, 24(%rdi)
repz retq
#ifndef __APPLE__
.size fe_neg,.-fe_neg
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_cmov
.type fe_cmov,@function
.align 16
fe_cmov:
#else
.section __TEXT,__text
.globl _fe_cmov
.p2align 4
_fe_cmov:
#endif /* __APPLE__ */
cmpl $0x01, %edx
movq (%rdi), %rcx
movq 8(%rdi), %r8
movq 16(%rdi), %r9
movq 24(%rdi), %r10
cmoveq (%rsi), %rcx
cmoveq 8(%rsi), %r8
cmoveq 16(%rsi), %r9
cmoveq 24(%rsi), %r10
movq %rcx, (%rdi)
movq %r8, 8(%rdi)
movq %r9, 16(%rdi)
movq %r10, 24(%rdi)
repz retq
#ifndef __APPLE__
.size fe_cmov,.-fe_cmov
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_isnonzero
.type fe_isnonzero,@function
.align 16
fe_isnonzero:
#else
.section __TEXT,__text
.globl _fe_isnonzero
.p2align 4
_fe_isnonzero:
#endif /* __APPLE__ */
movq $0x7fffffffffffffff, %r10
movq (%rdi), %rax
movq 8(%rdi), %rdx
movq 16(%rdi), %rcx
movq 24(%rdi), %r8
addq $19, %rax
adcq $0x00, %rdx
adcq $0x00, %rcx
adcq $0x00, %r8
shrq $63, %r8
imulq $19, %r8, %r9
movq (%rdi), %rax
movq 8(%rdi), %rdx
movq 16(%rdi), %rcx
movq 24(%rdi), %r8
addq %r9, %rax
adcq $0x00, %rdx
adcq $0x00, %rcx
adcq $0x00, %r8
andq %r10, %r8
orq %rdx, %rax
orq %rcx, %rax
orq %r8, %rax
repz retq
#ifndef __APPLE__
.size fe_isnonzero,.-fe_isnonzero
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_isnegative
.type fe_isnegative,@function
.align 16
fe_isnegative:
#else
.section __TEXT,__text
.globl _fe_isnegative
.p2align 4
_fe_isnegative:
#endif /* __APPLE__ */
movq $0x7fffffffffffffff, %r11
movq (%rdi), %rdx
movq 8(%rdi), %rcx
movq 16(%rdi), %r8
movq 24(%rdi), %r9
movq %rdx, %rax
addq $19, %rdx
adcq $0x00, %rcx
adcq $0x00, %r8
adcq $0x00, %r9
shrq $63, %r9
imulq $19, %r9, %r10
addq %r10, %rax
andq $0x01, %rax
repz retq
#ifndef __APPLE__
.size fe_isnegative,.-fe_isnegative
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_cmov_table
.type fe_cmov_table,@function
.align 16
fe_cmov_table:
#else
.section __TEXT,__text
.globl _fe_cmov_table
.p2align 4
_fe_cmov_table:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *fe_cmov_table_p(%rip)
#else
jmpq *_fe_cmov_table_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size fe_cmov_table,.-fe_cmov_table
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_mul
.type fe_mul,@function
.align 16
fe_mul:
#else
.section __TEXT,__text
.globl _fe_mul
.p2align 4
_fe_mul:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *fe_mul_p(%rip)
#else
jmpq *_fe_mul_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size fe_mul,.-fe_mul
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_sq
.type fe_sq,@function
.align 16
fe_sq:
#else
.section __TEXT,__text
.globl _fe_sq
.p2align 4
_fe_sq:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *fe_sq_p(%rip)
#else
jmpq *_fe_sq_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size fe_sq,.-fe_sq
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_mul121666
.type fe_mul121666,@function
.align 16
fe_mul121666:
#else
.section __TEXT,__text
.globl _fe_mul121666
.p2align 4
_fe_mul121666:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *fe_mul121666_p(%rip)
#else
jmpq *_fe_mul121666_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size fe_mul121666,.-fe_mul121666
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_invert
.type fe_invert,@function
.align 16
fe_invert:
#else
.section __TEXT,__text
.globl _fe_invert
.p2align 4
_fe_invert:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *fe_invert_p(%rip)
#else
jmpq *_fe_invert_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size fe_invert,.-fe_invert
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl curve25519
.type curve25519,@function
.align 16
curve25519:
#else
.section __TEXT,__text
.globl _curve25519
.p2align 4
_curve25519:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *curve25519_p(%rip)
#else
jmpq *_curve25519_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size curve25519,.-curve25519
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_pow22523
.type fe_pow22523,@function
.align 16
fe_pow22523:
#else
.section __TEXT,__text
.globl _fe_pow22523
.p2align 4
_fe_pow22523:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *fe_pow22523_p(%rip)
#else
jmpq *_fe_pow22523_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size fe_pow22523,.-fe_pow22523
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_p1p1_to_p2
.type ge_p1p1_to_p2,@function
.align 16
ge_p1p1_to_p2:
#else
.section __TEXT,__text
.globl _ge_p1p1_to_p2
.p2align 4
_ge_p1p1_to_p2:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *ge_p1p1_to_p2_p(%rip)
#else
jmpq *_ge_p1p1_to_p2_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size ge_p1p1_to_p2,.-ge_p1p1_to_p2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_p1p1_to_p3
.type ge_p1p1_to_p3,@function
.align 16
ge_p1p1_to_p3:
#else
.section __TEXT,__text
.globl _ge_p1p1_to_p3
.p2align 4
_ge_p1p1_to_p3:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *ge_p1p1_to_p3_p(%rip)
#else
jmpq *_ge_p1p1_to_p3_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size ge_p1p1_to_p3,.-ge_p1p1_to_p3
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_p2_dbl
.type ge_p2_dbl,@function
.align 16
ge_p2_dbl:
#else
.section __TEXT,__text
.globl _ge_p2_dbl
.p2align 4
_ge_p2_dbl:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *ge_p2_dbl_p(%rip)
#else
jmpq *_ge_p2_dbl_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size ge_p2_dbl,.-ge_p2_dbl
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_madd
.type ge_madd,@function
.align 16
ge_madd:
#else
.section __TEXT,__text
.globl _ge_madd
.p2align 4
_ge_madd:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *ge_madd_p(%rip)
#else
jmpq *_ge_madd_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size ge_madd,.-ge_madd
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_msub
.type ge_msub,@function
.align 16
ge_msub:
#else
.section __TEXT,__text
.globl _ge_msub
.p2align 4
_ge_msub:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *ge_msub_p(%rip)
#else
jmpq *_ge_msub_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size ge_msub,.-ge_msub
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_add
.type ge_add,@function
.align 16
ge_add:
#else
.section __TEXT,__text
.globl _ge_add
.p2align 4
_ge_add:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *ge_add_p(%rip)
#else
jmpq *_ge_add_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size ge_add,.-ge_add
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_sub
.type ge_sub,@function
.align 16
ge_sub:
#else
.section __TEXT,__text
.globl _ge_sub
.p2align 4
_ge_sub:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *ge_sub_p(%rip)
#else
jmpq *_ge_sub_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size ge_sub,.-ge_sub
#endif /* __APPLE__ */
#if defined(WOLFSSL_CURVE25519_NOT_USE_ED25519)
#if defined(WOLFSSL_CURVE25519_NOT_USE_ED25519)
#ifndef __APPLE__
.text
.globl curve25519_base
.type curve25519_base,@function
.align 16
curve25519_base:
#else
.section __TEXT,__text
.globl _curve25519_base
.p2align 4
_curve25519_base:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *curve25519_base_p(%rip)
#else
jmpq *_curve25519_base_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size curve25519_base,.-curve25519_base
#endif /* __APPLE__ */
#endif /* WOLFSSL_CURVE25519_NOT_USE_ED25519 */
#endif /* WOLFSSL_CURVE25519_NOT_USE_ED25519 */
#ifdef HAVE_ED25519
#ifdef HAVE_ED25519
#ifndef __APPLE__
.text
.globl fe_sq2
.type fe_sq2,@function
.align 16
fe_sq2:
#else
.section __TEXT,__text
.globl _fe_sq2
.p2align 4
_fe_sq2:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *fe_sq2_p(%rip)
#else
jmpq *_fe_sq2_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size fe_sq2,.-fe_sq2
#endif /* __APPLE__ */
#endif /* HAVE_ED25519 */
#ifdef HAVE_ED25519
#ifndef __APPLE__
.text
.globl fe_invert_nct
.type fe_invert_nct,@function
.align 16
fe_invert_nct:
#else
.section __TEXT,__text
.globl _fe_invert_nct
.p2align 4
_fe_invert_nct:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *fe_invert_nct_p(%rip)
#else
jmpq *_fe_invert_nct_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size fe_invert_nct,.-fe_invert_nct
#endif /* __APPLE__ */
#endif /* HAVE_ED25519 */
#ifdef HAVE_ED25519
#ifndef __APPLE__
.text
.globl sc_reduce
.type sc_reduce,@function
.align 16
sc_reduce:
#else
.section __TEXT,__text
.globl _sc_reduce
.p2align 4
_sc_reduce:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *sc_reduce_p(%rip)
#else
jmpq *_sc_reduce_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size sc_reduce,.-sc_reduce
#endif /* __APPLE__ */
#endif /* HAVE_ED25519 */
#ifdef HAVE_ED25519
#ifndef __APPLE__
.text
.globl sc_muladd
.type sc_muladd,@function
.align 16
sc_muladd:
#else
.section __TEXT,__text
.globl _sc_muladd
.p2align 4
_sc_muladd:
#endif /* __APPLE__ */
#ifndef __APPLE__
jmpq *sc_muladd_p(%rip)
#else
jmpq *_sc_muladd_p(%rip)
#endif /* __APPLE__ */
#ifndef __APPLE__
.size sc_muladd,.-sc_muladd
#endif /* __APPLE__ */
#endif /* HAVE_ED25519 */
#endif /* HAVE_ED25519 */
#ifndef __APPLE__
.data
.type cpuFlagsSet, @object
.size cpuFlagsSet,4
cpuFlagsSet:
.long 0
#else
.section __DATA,__data
.p2align 3
_cpuFlagsSet:
.long 0
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type intelFlags, @object
.size intelFlags,4
intelFlags:
.long 0
#else
.section __DATA,__data
.p2align 3
_intelFlags:
.long 0
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type fe_cmov_table_p, @object
.size fe_cmov_table_p,8
fe_cmov_table_p:
.quad fe_cmov_table_x64
#else
.section __DATA,__data
.p2align 3
_fe_cmov_table_p:
.quad _fe_cmov_table_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type fe_mul_p, @object
.size fe_mul_p,8
fe_mul_p:
.quad fe_mul_x64
#else
.section __DATA,__data
.p2align 3
_fe_mul_p:
.quad _fe_mul_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type fe_sq_p, @object
.size fe_sq_p,8
fe_sq_p:
.quad fe_sq_x64
#else
.section __DATA,__data
.p2align 3
_fe_sq_p:
.quad _fe_sq_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type fe_mul121666_p, @object
.size fe_mul121666_p,8
fe_mul121666_p:
.quad fe_mul121666_x64
#else
.section __DATA,__data
.p2align 3
_fe_mul121666_p:
.quad _fe_mul121666_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type fe_invert_p, @object
.size fe_invert_p,8
fe_invert_p:
.quad fe_invert_x64
#else
.section __DATA,__data
.p2align 3
_fe_invert_p:
.quad _fe_invert_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type curve25519_p, @object
.size curve25519_p,8
curve25519_p:
.quad curve25519_x64
#else
.section __DATA,__data
.p2align 3
_curve25519_p:
.quad _curve25519_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type fe_pow22523_p, @object
.size fe_pow22523_p,8
fe_pow22523_p:
.quad fe_pow22523_x64
#else
.section __DATA,__data
.p2align 3
_fe_pow22523_p:
.quad _fe_pow22523_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type ge_p1p1_to_p2_p, @object
.size ge_p1p1_to_p2_p,8
ge_p1p1_to_p2_p:
.quad ge_p1p1_to_p2_x64
#else
.section __DATA,__data
.p2align 3
_ge_p1p1_to_p2_p:
.quad _ge_p1p1_to_p2_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type ge_p1p1_to_p3_p, @object
.size ge_p1p1_to_p3_p,8
ge_p1p1_to_p3_p:
.quad ge_p1p1_to_p3_x64
#else
.section __DATA,__data
.p2align 3
_ge_p1p1_to_p3_p:
.quad _ge_p1p1_to_p3_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type ge_p2_dbl_p, @object
.size ge_p2_dbl_p,8
ge_p2_dbl_p:
.quad ge_p2_dbl_x64
#else
.section __DATA,__data
.p2align 3
_ge_p2_dbl_p:
.quad _ge_p2_dbl_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type ge_madd_p, @object
.size ge_madd_p,8
ge_madd_p:
.quad ge_madd_x64
#else
.section __DATA,__data
.p2align 3
_ge_madd_p:
.quad _ge_madd_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type ge_msub_p, @object
.size ge_msub_p,8
ge_msub_p:
.quad ge_msub_x64
#else
.section __DATA,__data
.p2align 3
_ge_msub_p:
.quad _ge_msub_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type ge_add_p, @object
.size ge_add_p,8
ge_add_p:
.quad ge_add_x64
#else
.section __DATA,__data
.p2align 3
_ge_add_p:
.quad _ge_add_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type ge_sub_p, @object
.size ge_sub_p,8
ge_sub_p:
.quad ge_sub_x64
#else
.section __DATA,__data
.p2align 3
_ge_sub_p:
.quad _ge_sub_x64
#endif /* __APPLE__ */
#if defined(WOLFSSL_CURVE25519_NOT_USE_ED25519)
#ifndef __APPLE__
.data
.type curve25519_base_p, @object
.size curve25519_base_p,8
curve25519_base_p:
.quad curve25519_base_x64
#else
.section __DATA,__data
.p2align 3
_curve25519_base_p:
.quad _curve25519_base_x64
#endif /* __APPLE__ */
#endif /* WOLFSSL_CURVE25519_NOT_USE_ED25519 */
#ifdef HAVE_ED25519
#ifndef __APPLE__
.data
.type fe_sq2_p, @object
.size fe_sq2_p,8
fe_sq2_p:
.quad fe_sq2_x64
#else
.section __DATA,__data
.p2align 3
_fe_sq2_p:
.quad _fe_sq2_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type fe_invert_nct_p, @object
.size fe_invert_nct_p,8
fe_invert_nct_p:
.quad fe_invert_nct_x64
#else
.section __DATA,__data
.p2align 3
_fe_invert_nct_p:
.quad _fe_invert_nct_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type sc_reduce_p, @object
.size sc_reduce_p,8
sc_reduce_p:
.quad sc_reduce_x64
#else
.section __DATA,__data
.p2align 3
_sc_reduce_p:
.quad _sc_reduce_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
.type sc_muladd_p, @object
.size sc_muladd_p,8
sc_muladd_p:
.quad sc_muladd_x64
#else
.section __DATA,__data
.p2align 3
_sc_muladd_p:
.quad _sc_muladd_x64
#endif /* __APPLE__ */
#endif /* HAVE_ED25519 */
#ifndef __APPLE__
.text
.globl fe_cmov_table_x64
.type fe_cmov_table_x64,@function
.align 16
fe_cmov_table_x64:
#else
.section __TEXT,__text
.globl _fe_cmov_table_x64
.p2align 4
_fe_cmov_table_x64:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
movq %rdx, %rcx
movsbq %cl, %rax
cdq
xorb %dl, %al
subb %dl, %al
movb %al, %r15b
movq $0x01, %rax
xorq %rdx, %rdx
xorq %r8, %r8
xorq %r9, %r9
movq $0x01, %r10
xorq %r11, %r11
xorq %r12, %r12
xorq %r13, %r13
cmpb $0x01, %r15b
movq (%rsi), %r14
cmoveq %r14, %rax
movq 8(%rsi), %r14
cmoveq %r14, %rdx
movq 16(%rsi), %r14
cmoveq %r14, %r8
movq 24(%rsi), %r14
cmoveq %r14, %r9
movq 32(%rsi), %r14
cmoveq %r14, %r10
movq 40(%rsi), %r14
cmoveq %r14, %r11
movq 48(%rsi), %r14
cmoveq %r14, %r12
movq 56(%rsi), %r14
cmoveq %r14, %r13
cmpb $2, %r15b
movq 96(%rsi), %r14
cmoveq %r14, %rax
movq 104(%rsi), %r14
cmoveq %r14, %rdx
movq 112(%rsi), %r14
cmoveq %r14, %r8
movq 120(%rsi), %r14
cmoveq %r14, %r9
movq 128(%rsi), %r14
cmoveq %r14, %r10
movq 136(%rsi), %r14
cmoveq %r14, %r11
movq 144(%rsi), %r14
cmoveq %r14, %r12
movq 152(%rsi), %r14
cmoveq %r14, %r13
cmpb $3, %r15b
movq 192(%rsi), %r14
cmoveq %r14, %rax
movq 200(%rsi), %r14
cmoveq %r14, %rdx
movq 208(%rsi), %r14
cmoveq %r14, %r8
movq 216(%rsi), %r14
cmoveq %r14, %r9
movq 224(%rsi), %r14
cmoveq %r14, %r10
movq 232(%rsi), %r14
cmoveq %r14, %r11
movq 240(%rsi), %r14
cmoveq %r14, %r12
movq 248(%rsi), %r14
cmoveq %r14, %r13
cmpb $4, %r15b
movq 288(%rsi), %r14
cmoveq %r14, %rax
movq 296(%rsi), %r14
cmoveq %r14, %rdx
movq 304(%rsi), %r14
cmoveq %r14, %r8
movq 312(%rsi), %r14
cmoveq %r14, %r9
movq 320(%rsi), %r14
cmoveq %r14, %r10
movq 328(%rsi), %r14
cmoveq %r14, %r11
movq 336(%rsi), %r14
cmoveq %r14, %r12
movq 344(%rsi), %r14
cmoveq %r14, %r13
cmpb $5, %r15b
movq 384(%rsi), %r14
cmoveq %r14, %rax
movq 392(%rsi), %r14
cmoveq %r14, %rdx
movq 400(%rsi), %r14
cmoveq %r14, %r8
movq 408(%rsi), %r14
cmoveq %r14, %r9
movq 416(%rsi), %r14
cmoveq %r14, %r10
movq 424(%rsi), %r14
cmoveq %r14, %r11
movq 432(%rsi), %r14
cmoveq %r14, %r12
movq 440(%rsi), %r14
cmoveq %r14, %r13
cmpb $6, %r15b
movq 480(%rsi), %r14
cmoveq %r14, %rax
movq 488(%rsi), %r14
cmoveq %r14, %rdx
movq 496(%rsi), %r14
cmoveq %r14, %r8
movq 504(%rsi), %r14
cmoveq %r14, %r9
movq 512(%rsi), %r14
cmoveq %r14, %r10
movq 520(%rsi), %r14
cmoveq %r14, %r11
movq 528(%rsi), %r14
cmoveq %r14, %r12
movq 536(%rsi), %r14
cmoveq %r14, %r13
cmpb $7, %r15b
movq 576(%rsi), %r14
cmoveq %r14, %rax
movq 584(%rsi), %r14
cmoveq %r14, %rdx
movq 592(%rsi), %r14
cmoveq %r14, %r8
movq 600(%rsi), %r14
cmoveq %r14, %r9
movq 608(%rsi), %r14
cmoveq %r14, %r10
movq 616(%rsi), %r14
cmoveq %r14, %r11
movq 624(%rsi), %r14
cmoveq %r14, %r12
movq 632(%rsi), %r14
cmoveq %r14, %r13
cmpb $8, %r15b
movq 672(%rsi), %r14
cmoveq %r14, %rax
movq 680(%rsi), %r14
cmoveq %r14, %rdx
movq 688(%rsi), %r14
cmoveq %r14, %r8
movq 696(%rsi), %r14
cmoveq %r14, %r9
movq 704(%rsi), %r14
cmoveq %r14, %r10
movq 712(%rsi), %r14
cmoveq %r14, %r11
movq 720(%rsi), %r14
cmoveq %r14, %r12
movq 728(%rsi), %r14
cmoveq %r14, %r13
cmpb $0x00, %cl
movq %rax, %r14
cmovlq %r10, %rax
cmovlq %r14, %r10
movq %rdx, %r14
cmovlq %r11, %rdx
cmovlq %r14, %r11
movq %r8, %r14
cmovlq %r12, %r8
cmovlq %r14, %r12
movq %r9, %r14
cmovlq %r13, %r9
cmovlq %r14, %r13
movq %rax, (%rdi)
movq %rdx, 8(%rdi)
movq %r8, 16(%rdi)
movq %r9, 24(%rdi)
movq %r10, 32(%rdi)
movq %r11, 40(%rdi)
movq %r12, 48(%rdi)
movq %r13, 56(%rdi)
xorq %rax, %rax
xorq %rdx, %rdx
xorq %r8, %r8
xorq %r9, %r9
cmpb $0x01, %r15b
movq 64(%rsi), %r14
cmoveq %r14, %rax
movq 72(%rsi), %r14
cmoveq %r14, %rdx
movq 80(%rsi), %r14
cmoveq %r14, %r8
movq 88(%rsi), %r14
cmoveq %r14, %r9
cmpb $2, %r15b
movq 160(%rsi), %r14
cmoveq %r14, %rax
movq 168(%rsi), %r14
cmoveq %r14, %rdx
movq 176(%rsi), %r14
cmoveq %r14, %r8
movq 184(%rsi), %r14
cmoveq %r14, %r9
cmpb $3, %r15b
movq 256(%rsi), %r14
cmoveq %r14, %rax
movq 264(%rsi), %r14
cmoveq %r14, %rdx
movq 272(%rsi), %r14
cmoveq %r14, %r8
movq 280(%rsi), %r14
cmoveq %r14, %r9
cmpb $4, %r15b
movq 352(%rsi), %r14
cmoveq %r14, %rax
movq 360(%rsi), %r14
cmoveq %r14, %rdx
movq 368(%rsi), %r14
cmoveq %r14, %r8
movq 376(%rsi), %r14
cmoveq %r14, %r9
cmpb $5, %r15b
movq 448(%rsi), %r14
cmoveq %r14, %rax
movq 456(%rsi), %r14
cmoveq %r14, %rdx
movq 464(%rsi), %r14
cmoveq %r14, %r8
movq 472(%rsi), %r14
cmoveq %r14, %r9
cmpb $6, %r15b
movq 544(%rsi), %r14
cmoveq %r14, %rax
movq 552(%rsi), %r14
cmoveq %r14, %rdx
movq 560(%rsi), %r14
cmoveq %r14, %r8
movq 568(%rsi), %r14
cmoveq %r14, %r9
cmpb $7, %r15b
movq 640(%rsi), %r14
cmoveq %r14, %rax
movq 648(%rsi), %r14
cmoveq %r14, %rdx
movq 656(%rsi), %r14
cmoveq %r14, %r8
movq 664(%rsi), %r14
cmoveq %r14, %r9
cmpb $8, %r15b
movq 736(%rsi), %r14
cmoveq %r14, %rax
movq 744(%rsi), %r14
cmoveq %r14, %rdx
movq 752(%rsi), %r14
cmoveq %r14, %r8
movq 760(%rsi), %r14
cmoveq %r14, %r9
movq $-19, %r10
movq $-1, %r11
movq $-1, %r12
movq $0x7fffffffffffffff, %r13
subq %rax, %r10
sbbq %rdx, %r11
sbbq %r8, %r12
sbbq %r9, %r13
cmpb $0x00, %cl
cmovlq %r10, %rax
cmovlq %r11, %rdx
cmovlq %r12, %r8
cmovlq %r13, %r9
movq %rax, 64(%rdi)
movq %rdx, 72(%rdi)
movq %r8, 80(%rdi)
movq %r9, 88(%rdi)
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size fe_cmov_table_x64,.-fe_cmov_table_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_mul_x64
.type fe_mul_x64,@function
.align 16
fe_mul_x64:
#else
.section __TEXT,__text
.globl _fe_mul_x64
.p2align 4
_fe_mul_x64:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
movq %rdx, %rcx
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%rsi)
movq %rax, %r8
movq %rdx, %r9
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%rsi)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%rsi)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%rsi)
addq %rax, %r10
adcq %rdx, %r11
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%rsi)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%rsi)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%rsi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%rsi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%rsi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%rsi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%rsi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r14
adcq %rdx, %r15
movq $38, %rax
mulq %r15
addq %rax, %r11
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r11, %rdx
imulq $19, %rdx, %rdx
andq %rbx, %r11
movq %rdx, %rbx
movq $38, %rax
mulq %r12
xorq %r12, %r12
addq %rax, %r8
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
adcq %rdx, %r14
addq %rbx, %r8
adcq %r12, %r9
adcq %r13, %r10
adcq %r14, %r11
movq $0x7fffffffffffffff, %rbx
movq %r11, %rax
sarq $63, %rax
andq $19, %rax
andq %rbx, %r11
addq %rax, %r8
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
# Store
movq %r8, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size fe_mul_x64,.-fe_mul_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_sq_x64
.type fe_sq_x64,@function
.align 16
fe_sq_x64:
#else
.section __TEXT,__text
.globl _fe_sq_x64
.p2align 4
_fe_sq_x64:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
# Square
# A[0] * A[1]
movq (%rsi), %rax
mulq 8(%rsi)
movq %rax, %r8
movq %rdx, %r9
# A[0] * A[2]
movq (%rsi), %rax
mulq 16(%rsi)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[0] * A[3]
movq (%rsi), %rax
mulq 24(%rsi)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[1] * A[2]
movq 8(%rsi), %rax
mulq 16(%rsi)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * A[3]
movq 8(%rsi), %rax
mulq 24(%rsi)
addq %rax, %r11
adcq %rdx, %r12
# A[2] * A[3]
movq 16(%rsi), %rax
mulq 24(%rsi)
xorq %r13, %r13
addq %rax, %r12
adcq %rdx, %r13
# Double
xorq %r14, %r14
addq %r8, %r8
adcq %r9, %r9
adcq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq $0x00, %r14
# A[0] * A[0]
movq (%rsi), %rax
mulq %rax
movq %rax, %rcx
movq %rdx, %r15
# A[1] * A[1]
movq 8(%rsi), %rax
mulq %rax
addq %r15, %r8
adcq %rax, %r9
adcq $0x00, %rdx
movq %rdx, %r15
# A[2] * A[2]
movq 16(%rsi), %rax
mulq %rax
addq %r15, %r10
adcq %rax, %r11
adcq $0x00, %rdx
movq %rdx, %r15
# A[3] * A[3]
movq 24(%rsi), %rax
mulq %rax
addq %rax, %r13
adcq %rdx, %r14
addq %r15, %r12
adcq $0x00, %r13
adcq $0x00, %r14
movq $38, %rax
mulq %r14
addq %rax, %r10
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r15
shldq $0x01, %r10, %rdx
imulq $19, %rdx, %rdx
andq %r15, %r10
movq %rdx, %r15
movq $38, %rax
mulq %r11
xorq %r11, %r11
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r11
mulq %r12
xorq %r12, %r12
addq %rax, %r8
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
adcq %rdx, %r13
addq %r15, %rcx
adcq %r11, %r8
adcq %r12, %r9
adcq %r13, %r10
movq $0x7fffffffffffffff, %r15
movq %r10, %rax
sarq $63, %rax
andq $19, %rax
andq %r15, %r10
addq %rax, %rcx
adcq $0x00, %r8
adcq $0x00, %r9
adcq $0x00, %r10
# Store
movq %rcx, (%rdi)
movq %r8, 8(%rdi)
movq %r9, 16(%rdi)
movq %r10, 24(%rdi)
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size fe_sq_x64,.-fe_sq_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_sq_n_x64
.type fe_sq_n_x64,@function
.align 16
fe_sq_n_x64:
#else
.section __TEXT,__text
.globl _fe_sq_n_x64
.p2align 4
_fe_sq_n_x64:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
movq %rdx, %rcx
L_fe_sq_n_x64:
# Square
# A[0] * A[1]
movq (%rsi), %rax
mulq 8(%rsi)
movq %rax, %r9
movq %rdx, %r10
# A[0] * A[2]
movq (%rsi), %rax
mulq 16(%rsi)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[0] * A[3]
movq (%rsi), %rax
mulq 24(%rsi)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * A[2]
movq 8(%rsi), %rax
mulq 16(%rsi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * A[3]
movq 8(%rsi), %rax
mulq 24(%rsi)
addq %rax, %r12
adcq %rdx, %r13
# A[2] * A[3]
movq 16(%rsi), %rax
mulq 24(%rsi)
xorq %r14, %r14
addq %rax, %r13
adcq %rdx, %r14
# Double
xorq %r15, %r15
addq %r9, %r9
adcq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq %r14, %r14
adcq $0x00, %r15
# A[0] * A[0]
movq (%rsi), %rax
mulq %rax
movq %rax, %r8
movq %rdx, %rbx
# A[1] * A[1]
movq 8(%rsi), %rax
mulq %rax
addq %rbx, %r9
adcq %rax, %r10
adcq $0x00, %rdx
movq %rdx, %rbx
# A[2] * A[2]
movq 16(%rsi), %rax
mulq %rax
addq %rbx, %r11
adcq %rax, %r12
adcq $0x00, %rdx
movq %rdx, %rbx
# A[3] * A[3]
movq 24(%rsi), %rax
mulq %rax
addq %rax, %r14
adcq %rdx, %r15
addq %rbx, %r13
adcq $0x00, %r14
adcq $0x00, %r15
movq $38, %rax
mulq %r15
addq %rax, %r11
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r11, %rdx
imulq $19, %rdx, %rdx
andq %rbx, %r11
movq %rdx, %rbx
movq $38, %rax
mulq %r12
xorq %r12, %r12
addq %rax, %r8
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
adcq %rdx, %r14
addq %rbx, %r8
adcq %r12, %r9
adcq %r13, %r10
adcq %r14, %r11
# Store
movq %r8, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
decb %cl
jnz L_fe_sq_n_x64
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size fe_sq_n_x64,.-fe_sq_n_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_mul121666_x64
.type fe_mul121666_x64,@function
.align 16
fe_mul121666_x64:
#else
.section __TEXT,__text
.globl _fe_mul121666_x64
.p2align 4
_fe_mul121666_x64:
#endif /* __APPLE__ */
pushq %r12
# Multiply by 121666
movq $0x1db42, %rax
mulq (%rsi)
xorq %r10, %r10
movq %rax, %r8
movq %rdx, %r9
movq $0x1db42, %rax
mulq 8(%rsi)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
movq $0x1db42, %rax
mulq 16(%rsi)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
movq $0x1db42, %rax
mulq 24(%rsi)
movq $0x7fffffffffffffff, %rcx
addq %rax, %r11
adcq %rdx, %r12
shldq $0x01, %r11, %r12
andq %rcx, %r11
movq $19, %rax
mulq %r12
addq %rax, %r8
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
movq %r8, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
popq %r12
repz retq
#ifndef __APPLE__
.size fe_mul121666_x64,.-fe_mul121666_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_invert_x64
.type fe_invert_x64,@function
.align 16
fe_invert_x64:
#else
.section __TEXT,__text
.globl _fe_invert_x64
.p2align 4
_fe_invert_x64:
#endif /* __APPLE__ */
subq $0x90, %rsp
# Invert
movq %rdi, 128(%rsp)
movq %rsi, 136(%rsp)
movq %rsp, %rdi
movq 136(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
movq 136(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
movq %rsp, %rdi
movq %rsp, %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $4, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $9, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $19, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $9, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $49, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $0x63, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $49, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
movq $4, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
movq 128(%rsp), %rdi
leaq 32(%rsp), %rsi
movq %rsp, %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
movq 136(%rsp), %rsi
movq 128(%rsp), %rdi
addq $0x90, %rsp
repz retq
#if defined(WOLFSSL_CURVE25519_NOT_USE_ED25519)
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 32
#else
.p2align 5
#endif /* __APPLE__ */
L_curve25519_base_x64_x2:
.quad 0x5cae469cdd684efb, 0x8f3f5ced1e350b5c
.quad 0xd9750c687d157114, 0x20d342d51873f1b7
#ifndef __APPLE__
.text
.globl curve25519_base_x64
.type curve25519_base_x64,@function
.align 16
curve25519_base_x64:
#else
.section __TEXT,__text
.globl _curve25519_base_x64
.p2align 4
_curve25519_base_x64:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
pushq %rbp
subq $0xa8, %rsp
xorq %r15, %r15
movq %rdi, 160(%rsp)
# Set base point x
movq $9, (%rdi)
movq $0x00, 8(%rdi)
movq $0x00, 16(%rdi)
movq $0x00, 24(%rdi)
# Set one
movq $0x01, (%rsp)
movq $0x00, 8(%rsp)
movq $0x00, 16(%rsp)
movq $0x00, 24(%rsp)
movq 0+L_curve25519_base_x64_x2(%rip), %rcx
movq 8+L_curve25519_base_x64_x2(%rip), %r8
movq 16+L_curve25519_base_x64_x2(%rip), %r9
movq 24+L_curve25519_base_x64_x2(%rip), %r10
# Set one
movq $0x01, 32(%rsp)
movq $0x00, 40(%rsp)
movq $0x00, 48(%rsp)
movq $0x00, 56(%rsp)
movq %rcx, 64(%rsp)
movq %r8, 72(%rsp)
movq %r9, 80(%rsp)
movq %r10, 88(%rsp)
movq $0xfd, %rbp
L_curve25519_base_x64_bits:
movq %rbp, %r8
movq %rbp, %rcx
andq $63, %rcx
shrq $6, %r8
movq (%rsi,%r8,8), %rbx
shrq %cl, %rbx
andq $0x01, %rbx
xorq %rbx, %r15
negq %r15
# Conditional Swap
movq (%rdi), %rcx
movq 8(%rdi), %r8
movq 16(%rdi), %r9
movq 24(%rdi), %r10
movq (%rsp), %r11
movq 8(%rsp), %r12
movq 16(%rsp), %r13
movq 24(%rsp), %r14
xorq 64(%rsp), %rcx
xorq 72(%rsp), %r8
xorq 80(%rsp), %r9
xorq 88(%rsp), %r10
xorq 32(%rsp), %r11
xorq 40(%rsp), %r12
xorq 48(%rsp), %r13
xorq 56(%rsp), %r14
andq %r15, %rcx
andq %r15, %r8
andq %r15, %r9
andq %r15, %r10
andq %r15, %r11
andq %r15, %r12
andq %r15, %r13
andq %r15, %r14
xorq %rcx, (%rdi)
xorq %r8, 8(%rdi)
xorq %r9, 16(%rdi)
xorq %r10, 24(%rdi)
xorq %r11, (%rsp)
xorq %r12, 8(%rsp)
xorq %r13, 16(%rsp)
xorq %r14, 24(%rsp)
xorq %rcx, 64(%rsp)
xorq %r8, 72(%rsp)
xorq %r9, 80(%rsp)
xorq %r10, 88(%rsp)
xorq %r11, 32(%rsp)
xorq %r12, 40(%rsp)
xorq %r13, 48(%rsp)
xorq %r14, 56(%rsp)
movq %rbx, %r15
# Add-Sub
# Add
movq (%rdi), %rcx
movq 8(%rdi), %r8
movq 16(%rdi), %r9
movq 24(%rdi), %r10
movq %rcx, %r11
addq (%rsp), %rcx
movq %r8, %r12
adcq 8(%rsp), %r8
movq %r9, %r13
adcq 16(%rsp), %r9
movq %r10, %r14
adcq 24(%rsp), %r10
movq $0x00, %rbx
adcq $0x00, %rbx
shldq $0x01, %r10, %rbx
imulq $19, %rbx
btr $63, %r10
# Sub modulus (if overflow)
addq %rbx, %rcx
adcq $0x00, %r8
adcq $0x00, %r9
adcq $0x00, %r10
# Sub
subq (%rsp), %r11
sbbq 8(%rsp), %r12
sbbq 16(%rsp), %r13
sbbq 24(%rsp), %r14
sbbq %rbx, %rbx
shldq $0x01, %r14, %rbx
imulq $-19, %rbx
btr $63, %r14
# Add modulus (if underflow)
subq %rbx, %r11
sbbq $0x00, %r12
sbbq $0x00, %r13
sbbq $0x00, %r14
movq %rcx, (%rdi)
movq %r8, 8(%rdi)
movq %r9, 16(%rdi)
movq %r10, 24(%rdi)
movq %r11, 128(%rsp)
movq %r12, 136(%rsp)
movq %r13, 144(%rsp)
movq %r14, 152(%rsp)
# Add-Sub
# Add
movq 64(%rsp), %rcx
movq 72(%rsp), %r8
movq 80(%rsp), %r9
movq 88(%rsp), %r10
movq %rcx, %r11
addq 32(%rsp), %rcx
movq %r8, %r12
adcq 40(%rsp), %r8
movq %r9, %r13
adcq 48(%rsp), %r9
movq %r10, %r14
adcq 56(%rsp), %r10
movq $0x00, %rbx
adcq $0x00, %rbx
shldq $0x01, %r10, %rbx
imulq $19, %rbx
btr $63, %r10
# Sub modulus (if overflow)
addq %rbx, %rcx
adcq $0x00, %r8
adcq $0x00, %r9
adcq $0x00, %r10
# Sub
subq 32(%rsp), %r11
sbbq 40(%rsp), %r12
sbbq 48(%rsp), %r13
sbbq 56(%rsp), %r14
sbbq %rbx, %rbx
shldq $0x01, %r14, %rbx
imulq $-19, %rbx
btr $63, %r14
# Add modulus (if underflow)
subq %rbx, %r11
sbbq $0x00, %r12
sbbq $0x00, %r13
sbbq $0x00, %r14
movq %rcx, 32(%rsp)
movq %r8, 40(%rsp)
movq %r9, 48(%rsp)
movq %r10, 56(%rsp)
movq %r11, 96(%rsp)
movq %r12, 104(%rsp)
movq %r13, 112(%rsp)
movq %r14, 120(%rsp)
# Multiply
# A[0] * B[0]
movq 128(%rsp), %rax
mulq 32(%rsp)
movq %rax, %rcx
movq %rdx, %r8
# A[0] * B[1]
movq 136(%rsp), %rax
mulq 32(%rsp)
xorq %r9, %r9
addq %rax, %r8
adcq %rdx, %r9
# A[1] * B[0]
movq 128(%rsp), %rax
mulq 40(%rsp)
xorq %r10, %r10
addq %rax, %r8
adcq %rdx, %r9
adcq $0x00, %r10
# A[0] * B[2]
movq 144(%rsp), %rax
mulq 32(%rsp)
addq %rax, %r9
adcq %rdx, %r10
# A[1] * B[1]
movq 136(%rsp), %rax
mulq 40(%rsp)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[2] * B[0]
movq 128(%rsp), %rax
mulq 48(%rsp)
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[0] * B[3]
movq 152(%rsp), %rax
mulq 32(%rsp)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * B[2]
movq 144(%rsp), %rax
mulq 40(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[2] * B[1]
movq 136(%rsp), %rax
mulq 48(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[3] * B[0]
movq 128(%rsp), %rax
mulq 56(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * B[3]
movq 152(%rsp), %rax
mulq 40(%rsp)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[2]
movq 144(%rsp), %rax
mulq 48(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[3] * B[1]
movq 136(%rsp), %rax
mulq 56(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[3]
movq 152(%rsp), %rax
mulq 48(%rsp)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[2]
movq 144(%rsp), %rax
mulq 56(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[3]
movq 152(%rsp), %rax
mulq 56(%rsp)
addq %rax, %r13
adcq %rdx, %r14
movq $38, %rax
mulq %r14
addq %rax, %r10
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r10, %rdx
imulq $19, %rdx, %rdx
andq %rbx, %r10
movq %rdx, %rbx
movq $38, %rax
mulq %r11
xorq %r11, %r11
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r11
mulq %r12
xorq %r12, %r12
addq %rax, %r8
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
adcq %rdx, %r13
addq %rbx, %rcx
adcq %r11, %r8
adcq %r12, %r9
adcq %r13, %r10
# Store
movq %rcx, 32(%rsp)
movq %r8, 40(%rsp)
movq %r9, 48(%rsp)
movq %r10, 56(%rsp)
# Multiply
# A[0] * B[0]
movq (%rdi), %rax
mulq 96(%rsp)
movq %rax, %rcx
movq %rdx, %r8
# A[0] * B[1]
movq 8(%rdi), %rax
mulq 96(%rsp)
xorq %r9, %r9
addq %rax, %r8
adcq %rdx, %r9
# A[1] * B[0]
movq (%rdi), %rax
mulq 104(%rsp)
xorq %r10, %r10
addq %rax, %r8
adcq %rdx, %r9
adcq $0x00, %r10
# A[0] * B[2]
movq 16(%rdi), %rax
mulq 96(%rsp)
addq %rax, %r9
adcq %rdx, %r10
# A[1] * B[1]
movq 8(%rdi), %rax
mulq 104(%rsp)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[2] * B[0]
movq (%rdi), %rax
mulq 112(%rsp)
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[0] * B[3]
movq 24(%rdi), %rax
mulq 96(%rsp)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * B[2]
movq 16(%rdi), %rax
mulq 104(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[2] * B[1]
movq 8(%rdi), %rax
mulq 112(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[3] * B[0]
movq (%rdi), %rax
mulq 120(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * B[3]
movq 24(%rdi), %rax
mulq 104(%rsp)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[2]
movq 16(%rdi), %rax
mulq 112(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[3] * B[1]
movq 8(%rdi), %rax
mulq 120(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[3]
movq 24(%rdi), %rax
mulq 112(%rsp)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[2]
movq 16(%rdi), %rax
mulq 120(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[3]
movq 24(%rdi), %rax
mulq 120(%rsp)
addq %rax, %r13
adcq %rdx, %r14
movq $38, %rax
mulq %r14
addq %rax, %r10
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r10, %rdx
imulq $19, %rdx, %rdx
andq %rbx, %r10
movq %rdx, %rbx
movq $38, %rax
mulq %r11
xorq %r11, %r11
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r11
mulq %r12
xorq %r12, %r12
addq %rax, %r8
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
adcq %rdx, %r13
addq %rbx, %rcx
adcq %r11, %r8
adcq %r12, %r9
adcq %r13, %r10
# Store
movq %rcx, (%rsp)
movq %r8, 8(%rsp)
movq %r9, 16(%rsp)
movq %r10, 24(%rsp)
# Square
# A[0] * A[1]
movq 128(%rsp), %rax
mulq 136(%rsp)
movq %rax, %r8
movq %rdx, %r9
# A[0] * A[2]
movq 128(%rsp), %rax
mulq 144(%rsp)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[0] * A[3]
movq 128(%rsp), %rax
mulq 152(%rsp)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[1] * A[2]
movq 136(%rsp), %rax
mulq 144(%rsp)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * A[3]
movq 136(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r11
adcq %rdx, %r12
# A[2] * A[3]
movq 144(%rsp), %rax
mulq 152(%rsp)
xorq %r13, %r13
addq %rax, %r12
adcq %rdx, %r13
# Double
xorq %r14, %r14
addq %r8, %r8
adcq %r9, %r9
adcq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq $0x00, %r14
# A[0] * A[0]
movq 128(%rsp), %rax
mulq %rax
movq %rax, %rcx
movq %rdx, %rbx
# A[1] * A[1]
movq 136(%rsp), %rax
mulq %rax
addq %rbx, %r8
adcq %rax, %r9
adcq $0x00, %rdx
movq %rdx, %rbx
# A[2] * A[2]
movq 144(%rsp), %rax
mulq %rax
addq %rbx, %r10
adcq %rax, %r11
adcq $0x00, %rdx
movq %rdx, %rbx
# A[3] * A[3]
movq 152(%rsp), %rax
mulq %rax
addq %rax, %r13
adcq %rdx, %r14
addq %rbx, %r12
adcq $0x00, %r13
adcq $0x00, %r14
movq $38, %rax
mulq %r14
addq %rax, %r10
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r10, %rdx
imulq $19, %rdx, %rdx
andq %rbx, %r10
movq %rdx, %rbx
movq $38, %rax
mulq %r11
xorq %r11, %r11
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r11
mulq %r12
xorq %r12, %r12
addq %rax, %r8
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
adcq %rdx, %r13
addq %rbx, %rcx
adcq %r11, %r8
adcq %r12, %r9
adcq %r13, %r10
# Store
movq %rcx, 96(%rsp)
movq %r8, 104(%rsp)
movq %r9, 112(%rsp)
movq %r10, 120(%rsp)
# Square
# A[0] * A[1]
movq (%rdi), %rax
mulq 8(%rdi)
movq %rax, %r8
movq %rdx, %r9
# A[0] * A[2]
movq (%rdi), %rax
mulq 16(%rdi)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[0] * A[3]
movq (%rdi), %rax
mulq 24(%rdi)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[1] * A[2]
movq 8(%rdi), %rax
mulq 16(%rdi)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * A[3]
movq 8(%rdi), %rax
mulq 24(%rdi)
addq %rax, %r11
adcq %rdx, %r12
# A[2] * A[3]
movq 16(%rdi), %rax
mulq 24(%rdi)
xorq %r13, %r13
addq %rax, %r12
adcq %rdx, %r13
# Double
xorq %r14, %r14
addq %r8, %r8
adcq %r9, %r9
adcq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq $0x00, %r14
# A[0] * A[0]
movq (%rdi), %rax
mulq %rax
movq %rax, %rcx
movq %rdx, %rbx
# A[1] * A[1]
movq 8(%rdi), %rax
mulq %rax
addq %rbx, %r8
adcq %rax, %r9
adcq $0x00, %rdx
movq %rdx, %rbx
# A[2] * A[2]
movq 16(%rdi), %rax
mulq %rax
addq %rbx, %r10
adcq %rax, %r11
adcq $0x00, %rdx
movq %rdx, %rbx
# A[3] * A[3]
movq 24(%rdi), %rax
mulq %rax
addq %rax, %r13
adcq %rdx, %r14
addq %rbx, %r12
adcq $0x00, %r13
adcq $0x00, %r14
movq $38, %rax
mulq %r14
addq %rax, %r10
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r10, %rdx
imulq $19, %rdx, %rdx
andq %rbx, %r10
movq %rdx, %rbx
movq $38, %rax
mulq %r11
xorq %r11, %r11
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r11
mulq %r12
xorq %r12, %r12
addq %rax, %r8
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
adcq %rdx, %r13
addq %rbx, %rcx
adcq %r11, %r8
adcq %r12, %r9
adcq %r13, %r10
# Store
movq %rcx, 128(%rsp)
movq %r8, 136(%rsp)
movq %r9, 144(%rsp)
movq %r10, 152(%rsp)
# Add-Sub
# Add
movq (%rsp), %rcx
movq 8(%rsp), %r8
movq 16(%rsp), %r9
movq 24(%rsp), %r10
movq %rcx, %r11
addq 32(%rsp), %rcx
movq %r8, %r12
adcq 40(%rsp), %r8
movq %r9, %r13
adcq 48(%rsp), %r9
movq %r10, %r14
adcq 56(%rsp), %r10
movq $0x00, %rbx
adcq $0x00, %rbx
shldq $0x01, %r10, %rbx
imulq $19, %rbx
btr $63, %r10
# Sub modulus (if overflow)
addq %rbx, %rcx
adcq $0x00, %r8
adcq $0x00, %r9
adcq $0x00, %r10
# Sub
subq 32(%rsp), %r11
sbbq 40(%rsp), %r12
sbbq 48(%rsp), %r13
sbbq 56(%rsp), %r14
sbbq %rbx, %rbx
shldq $0x01, %r14, %rbx
imulq $-19, %rbx
btr $63, %r14
# Add modulus (if underflow)
subq %rbx, %r11
sbbq $0x00, %r12
sbbq $0x00, %r13
sbbq $0x00, %r14
movq %rcx, 64(%rsp)
movq %r8, 72(%rsp)
movq %r9, 80(%rsp)
movq %r10, 88(%rsp)
movq %r11, 32(%rsp)
movq %r12, 40(%rsp)
movq %r13, 48(%rsp)
movq %r14, 56(%rsp)
# Multiply
# A[0] * B[0]
movq 96(%rsp), %rax
mulq 128(%rsp)
movq %rax, %rcx
movq %rdx, %r8
# A[0] * B[1]
movq 104(%rsp), %rax
mulq 128(%rsp)
xorq %r9, %r9
addq %rax, %r8
adcq %rdx, %r9
# A[1] * B[0]
movq 96(%rsp), %rax
mulq 136(%rsp)
xorq %r10, %r10
addq %rax, %r8
adcq %rdx, %r9
adcq $0x00, %r10
# A[0] * B[2]
movq 112(%rsp), %rax
mulq 128(%rsp)
addq %rax, %r9
adcq %rdx, %r10
# A[1] * B[1]
movq 104(%rsp), %rax
mulq 136(%rsp)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[2] * B[0]
movq 96(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[0] * B[3]
movq 120(%rsp), %rax
mulq 128(%rsp)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * B[2]
movq 112(%rsp), %rax
mulq 136(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[2] * B[1]
movq 104(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[3] * B[0]
movq 96(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * B[3]
movq 120(%rsp), %rax
mulq 136(%rsp)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[2]
movq 112(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[3] * B[1]
movq 104(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[3]
movq 120(%rsp), %rax
mulq 144(%rsp)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[2]
movq 112(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[3]
movq 120(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r13
adcq %rdx, %r14
movq $38, %rax
mulq %r14
addq %rax, %r10
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r10, %rdx
imulq $19, %rdx, %rdx
andq %rbx, %r10
movq %rdx, %rbx
movq $38, %rax
mulq %r11
xorq %r11, %r11
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r11
mulq %r12
xorq %r12, %r12
addq %rax, %r8
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
adcq %rdx, %r13
addq %rbx, %rcx
adcq %r11, %r8
adcq %r12, %r9
adcq %r13, %r10
# Store
movq %rcx, (%rdi)
movq %r8, 8(%rdi)
movq %r9, 16(%rdi)
movq %r10, 24(%rdi)
# Sub
movq 128(%rsp), %rcx
movq 136(%rsp), %r8
movq 144(%rsp), %r9
movq 152(%rsp), %r10
subq 96(%rsp), %rcx
sbbq 104(%rsp), %r8
sbbq 112(%rsp), %r9
sbbq 120(%rsp), %r10
sbbq %rbx, %rbx
shldq $0x01, %r10, %rbx
imulq $-19, %rbx
btr $63, %r10
# Add modulus (if underflow)
subq %rbx, %rcx
sbbq $0x00, %r8
sbbq $0x00, %r9
sbbq $0x00, %r10
movq %rcx, 128(%rsp)
movq %r8, 136(%rsp)
movq %r9, 144(%rsp)
movq %r10, 152(%rsp)
# Square
# A[0] * A[1]
movq 32(%rsp), %rax
mulq 40(%rsp)
movq %rax, %r8
movq %rdx, %r9
# A[0] * A[2]
movq 32(%rsp), %rax
mulq 48(%rsp)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[0] * A[3]
movq 32(%rsp), %rax
mulq 56(%rsp)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[1] * A[2]
movq 40(%rsp), %rax
mulq 48(%rsp)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * A[3]
movq 40(%rsp), %rax
mulq 56(%rsp)
addq %rax, %r11
adcq %rdx, %r12
# A[2] * A[3]
movq 48(%rsp), %rax
mulq 56(%rsp)
xorq %r13, %r13
addq %rax, %r12
adcq %rdx, %r13
# Double
xorq %r14, %r14
addq %r8, %r8
adcq %r9, %r9
adcq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq $0x00, %r14
# A[0] * A[0]
movq 32(%rsp), %rax
mulq %rax
movq %rax, %rcx
movq %rdx, %rbx
# A[1] * A[1]
movq 40(%rsp), %rax
mulq %rax
addq %rbx, %r8
adcq %rax, %r9
adcq $0x00, %rdx
movq %rdx, %rbx
# A[2] * A[2]
movq 48(%rsp), %rax
mulq %rax
addq %rbx, %r10
adcq %rax, %r11
adcq $0x00, %rdx
movq %rdx, %rbx
# A[3] * A[3]
movq 56(%rsp), %rax
mulq %rax
addq %rax, %r13
adcq %rdx, %r14
addq %rbx, %r12
adcq $0x00, %r13
adcq $0x00, %r14
movq $38, %rax
mulq %r14
addq %rax, %r10
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r10, %rdx
imulq $19, %rdx, %rdx
andq %rbx, %r10
movq %rdx, %rbx
movq $38, %rax
mulq %r11
xorq %r11, %r11
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r11
mulq %r12
xorq %r12, %r12
addq %rax, %r8
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
adcq %rdx, %r13
addq %rbx, %rcx
adcq %r11, %r8
adcq %r12, %r9
adcq %r13, %r10
# Store
movq %rcx, 32(%rsp)
movq %r8, 40(%rsp)
movq %r9, 48(%rsp)
movq %r10, 56(%rsp)
# Square
# A[0] * A[1]
movq 64(%rsp), %rax
mulq 72(%rsp)
movq %rax, %r8
movq %rdx, %r9
# A[0] * A[2]
movq 64(%rsp), %rax
mulq 80(%rsp)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[0] * A[3]
movq 64(%rsp), %rax
mulq 88(%rsp)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[1] * A[2]
movq 72(%rsp), %rax
mulq 80(%rsp)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * A[3]
movq 72(%rsp), %rax
mulq 88(%rsp)
addq %rax, %r11
adcq %rdx, %r12
# A[2] * A[3]
movq 80(%rsp), %rax
mulq 88(%rsp)
xorq %r13, %r13
addq %rax, %r12
adcq %rdx, %r13
# Double
xorq %r14, %r14
addq %r8, %r8
adcq %r9, %r9
adcq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq $0x00, %r14
# A[0] * A[0]
movq 64(%rsp), %rax
mulq %rax
movq %rax, %rcx
movq %rdx, %rbx
# A[1] * A[1]
movq 72(%rsp), %rax
mulq %rax
addq %rbx, %r8
adcq %rax, %r9
adcq $0x00, %rdx
movq %rdx, %rbx
# A[2] * A[2]
movq 80(%rsp), %rax
mulq %rax
addq %rbx, %r10
adcq %rax, %r11
adcq $0x00, %rdx
movq %rdx, %rbx
# A[3] * A[3]
movq 88(%rsp), %rax
mulq %rax
addq %rax, %r13
adcq %rdx, %r14
addq %rbx, %r12
adcq $0x00, %r13
adcq $0x00, %r14
movq $38, %rax
mulq %r14
addq %rax, %r10
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r10, %rdx
imulq $19, %rdx, %rdx
andq %rbx, %r10
movq %rdx, %rbx
movq $38, %rax
mulq %r11
xorq %r11, %r11
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r11
mulq %r12
xorq %r12, %r12
addq %rax, %r8
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
adcq %rdx, %r13
addq %rbx, %rcx
adcq %r11, %r8
adcq %r12, %r9
adcq %r13, %r10
# Store
movq %rcx, 64(%rsp)
movq %r8, 72(%rsp)
movq %r9, 80(%rsp)
movq %r10, 88(%rsp)
# Multiply by 121666
movq $0x1db42, %rax
mulq 128(%rsp)
xorq %r9, %r9
movq %rax, %rcx
movq %rdx, %r8
movq $0x1db42, %rax
mulq 136(%rsp)
xorq %r10, %r10
addq %rax, %r8
adcq %rdx, %r9
movq $0x1db42, %rax
mulq 144(%rsp)
xorq %r12, %r12
addq %rax, %r9
adcq %rdx, %r10
movq $0x1db42, %rax
mulq 152(%rsp)
movq $0x7fffffffffffffff, %r11
addq %rax, %r10
adcq %rdx, %r12
addq 96(%rsp), %rcx
adcq 104(%rsp), %r8
adcq 112(%rsp), %r9
adcq 120(%rsp), %r10
adcq $0x00, %r12
shldq $0x01, %r10, %r12
andq %r11, %r10
movq $19, %rax
mulq %r12
addq %rax, %rcx
adcq $0x00, %r8
adcq $0x00, %r9
adcq $0x00, %r10
movq %rcx, 96(%rsp)
movq %r8, 104(%rsp)
movq %r9, 112(%rsp)
movq %r10, 120(%rsp)
# Multiply by 9
movq $9, %rax
mulq 32(%rsp)
xorq %r9, %r9
movq %rax, %rcx
movq %rdx, %r8
movq $9, %rax
mulq 40(%rsp)
xorq %r10, %r10
addq %rax, %r8
adcq %rdx, %r9
movq $9, %rax
mulq 48(%rsp)
xorq %r12, %r12
addq %rax, %r9
adcq %rdx, %r10
movq $9, %rax
mulq 56(%rsp)
movq $0x7fffffffffffffff, %r11
addq %rax, %r10
adcq %rdx, %r12
shldq $0x01, %r10, %r12
andq %r11, %r10
movq $19, %rax
mulq %r12
addq %rax, %rcx
adcq $0x00, %r8
adcq $0x00, %r9
adcq $0x00, %r10
movq %rcx, 32(%rsp)
movq %r8, 40(%rsp)
movq %r9, 48(%rsp)
movq %r10, 56(%rsp)
# Multiply
# A[0] * B[0]
movq 96(%rsp), %rax
mulq 128(%rsp)
movq %rax, %rcx
movq %rdx, %r8
# A[0] * B[1]
movq 104(%rsp), %rax
mulq 128(%rsp)
xorq %r9, %r9
addq %rax, %r8
adcq %rdx, %r9
# A[1] * B[0]
movq 96(%rsp), %rax
mulq 136(%rsp)
xorq %r10, %r10
addq %rax, %r8
adcq %rdx, %r9
adcq $0x00, %r10
# A[0] * B[2]
movq 112(%rsp), %rax
mulq 128(%rsp)
addq %rax, %r9
adcq %rdx, %r10
# A[1] * B[1]
movq 104(%rsp), %rax
mulq 136(%rsp)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[2] * B[0]
movq 96(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[0] * B[3]
movq 120(%rsp), %rax
mulq 128(%rsp)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * B[2]
movq 112(%rsp), %rax
mulq 136(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[2] * B[1]
movq 104(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[3] * B[0]
movq 96(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * B[3]
movq 120(%rsp), %rax
mulq 136(%rsp)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[2]
movq 112(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[3] * B[1]
movq 104(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[3]
movq 120(%rsp), %rax
mulq 144(%rsp)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[2]
movq 112(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[3]
movq 120(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r13
adcq %rdx, %r14
movq $38, %rax
mulq %r14
addq %rax, %r10
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r10, %rdx
imulq $19, %rdx, %rdx
andq %rbx, %r10
movq %rdx, %rbx
movq $38, %rax
mulq %r11
xorq %r11, %r11
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r11
mulq %r12
xorq %r12, %r12
addq %rax, %r8
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
adcq %rdx, %r13
addq %rbx, %rcx
adcq %r11, %r8
adcq %r12, %r9
adcq %r13, %r10
# Store
movq %rcx, (%rsp)
movq %r8, 8(%rsp)
movq %r9, 16(%rsp)
movq %r10, 24(%rsp)
decq %rbp
cmpq $3, %rbp
jge L_curve25519_base_x64_bits
negq %r15
# Conditional Swap
movq (%rdi), %rcx
movq 8(%rdi), %r8
movq 16(%rdi), %r9
movq 24(%rdi), %r10
movq (%rsp), %r11
movq 8(%rsp), %r12
movq 16(%rsp), %r13
movq 24(%rsp), %r14
xorq 64(%rsp), %rcx
xorq 72(%rsp), %r8
xorq 80(%rsp), %r9
xorq 88(%rsp), %r10
xorq 32(%rsp), %r11
xorq 40(%rsp), %r12
xorq 48(%rsp), %r13
xorq 56(%rsp), %r14
andq %r15, %rcx
andq %r15, %r8
andq %r15, %r9
andq %r15, %r10
andq %r15, %r11
andq %r15, %r12
andq %r15, %r13
andq %r15, %r14
xorq %rcx, (%rdi)
xorq %r8, 8(%rdi)
xorq %r9, 16(%rdi)
xorq %r10, 24(%rdi)
xorq %r11, (%rsp)
xorq %r12, 8(%rsp)
xorq %r13, 16(%rsp)
xorq %r14, 24(%rsp)
xorq %rcx, 64(%rsp)
xorq %r8, 72(%rsp)
xorq %r9, 80(%rsp)
xorq %r10, 88(%rsp)
xorq %r11, 32(%rsp)
xorq %r12, 40(%rsp)
xorq %r13, 48(%rsp)
xorq %r14, 56(%rsp)
L_curve25519_base_x64_3:
# Add-Sub
# Add
movq (%rdi), %rcx
movq 8(%rdi), %r8
movq 16(%rdi), %r9
movq 24(%rdi), %r10
movq %rcx, %r11
addq (%rsp), %rcx
movq %r8, %r12
adcq 8(%rsp), %r8
movq %r9, %r13
adcq 16(%rsp), %r9
movq %r10, %r14
adcq 24(%rsp), %r10
movq $0x00, %rbx
adcq $0x00, %rbx
shldq $0x01, %r10, %rbx
imulq $19, %rbx
btr $63, %r10
# Sub modulus (if overflow)
addq %rbx, %rcx
adcq $0x00, %r8
adcq $0x00, %r9
adcq $0x00, %r10
# Sub
subq (%rsp), %r11
sbbq 8(%rsp), %r12
sbbq 16(%rsp), %r13
sbbq 24(%rsp), %r14
sbbq %rbx, %rbx
shldq $0x01, %r14, %rbx
imulq $-19, %rbx
btr $63, %r14
# Add modulus (if underflow)
subq %rbx, %r11
sbbq $0x00, %r12
sbbq $0x00, %r13
sbbq $0x00, %r14
movq %rcx, (%rdi)
movq %r8, 8(%rdi)
movq %r9, 16(%rdi)
movq %r10, 24(%rdi)
movq %r11, 128(%rsp)
movq %r12, 136(%rsp)
movq %r13, 144(%rsp)
movq %r14, 152(%rsp)
# Square
# A[0] * A[1]
movq 128(%rsp), %rax
mulq 136(%rsp)
movq %rax, %r8
movq %rdx, %r9
# A[0] * A[2]
movq 128(%rsp), %rax
mulq 144(%rsp)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[0] * A[3]
movq 128(%rsp), %rax
mulq 152(%rsp)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[1] * A[2]
movq 136(%rsp), %rax
mulq 144(%rsp)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * A[3]
movq 136(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r11
adcq %rdx, %r12
# A[2] * A[3]
movq 144(%rsp), %rax
mulq 152(%rsp)
xorq %r13, %r13
addq %rax, %r12
adcq %rdx, %r13
# Double
xorq %r14, %r14
addq %r8, %r8
adcq %r9, %r9
adcq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq $0x00, %r14
# A[0] * A[0]
movq 128(%rsp), %rax
mulq %rax
movq %rax, %rcx
movq %rdx, %rbx
# A[1] * A[1]
movq 136(%rsp), %rax
mulq %rax
addq %rbx, %r8
adcq %rax, %r9
adcq $0x00, %rdx
movq %rdx, %rbx
# A[2] * A[2]
movq 144(%rsp), %rax
mulq %rax
addq %rbx, %r10
adcq %rax, %r11
adcq $0x00, %rdx
movq %rdx, %rbx
# A[3] * A[3]
movq 152(%rsp), %rax
mulq %rax
addq %rax, %r13
adcq %rdx, %r14
addq %rbx, %r12
adcq $0x00, %r13
adcq $0x00, %r14
movq $38, %rax
mulq %r14
addq %rax, %r10
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r10, %rdx
imulq $19, %rdx, %rdx
andq %rbx, %r10
movq %rdx, %rbx
movq $38, %rax
mulq %r11
xorq %r11, %r11
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r11
mulq %r12
xorq %r12, %r12
addq %rax, %r8
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
adcq %rdx, %r13
addq %rbx, %rcx
adcq %r11, %r8
adcq %r12, %r9
adcq %r13, %r10
# Store
movq %rcx, 96(%rsp)
movq %r8, 104(%rsp)
movq %r9, 112(%rsp)
movq %r10, 120(%rsp)
# Square
# A[0] * A[1]
movq (%rdi), %rax
mulq 8(%rdi)
movq %rax, %r8
movq %rdx, %r9
# A[0] * A[2]
movq (%rdi), %rax
mulq 16(%rdi)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[0] * A[3]
movq (%rdi), %rax
mulq 24(%rdi)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[1] * A[2]
movq 8(%rdi), %rax
mulq 16(%rdi)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * A[3]
movq 8(%rdi), %rax
mulq 24(%rdi)
addq %rax, %r11
adcq %rdx, %r12
# A[2] * A[3]
movq 16(%rdi), %rax
mulq 24(%rdi)
xorq %r13, %r13
addq %rax, %r12
adcq %rdx, %r13
# Double
xorq %r14, %r14
addq %r8, %r8
adcq %r9, %r9
adcq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq $0x00, %r14
# A[0] * A[0]
movq (%rdi), %rax
mulq %rax
movq %rax, %rcx
movq %rdx, %rbx
# A[1] * A[1]
movq 8(%rdi), %rax
mulq %rax
addq %rbx, %r8
adcq %rax, %r9
adcq $0x00, %rdx
movq %rdx, %rbx
# A[2] * A[2]
movq 16(%rdi), %rax
mulq %rax
addq %rbx, %r10
adcq %rax, %r11
adcq $0x00, %rdx
movq %rdx, %rbx
# A[3] * A[3]
movq 24(%rdi), %rax
mulq %rax
addq %rax, %r13
adcq %rdx, %r14
addq %rbx, %r12
adcq $0x00, %r13
adcq $0x00, %r14
movq $38, %rax
mulq %r14
addq %rax, %r10
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r10, %rdx
imulq $19, %rdx, %rdx
andq %rbx, %r10
movq %rdx, %rbx
movq $38, %rax
mulq %r11
xorq %r11, %r11
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r11
mulq %r12
xorq %r12, %r12
addq %rax, %r8
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
adcq %rdx, %r13
addq %rbx, %rcx
adcq %r11, %r8
adcq %r12, %r9
adcq %r13, %r10
# Store
movq %rcx, 128(%rsp)
movq %r8, 136(%rsp)
movq %r9, 144(%rsp)
movq %r10, 152(%rsp)
# Multiply
# A[0] * B[0]
movq 96(%rsp), %rax
mulq 128(%rsp)
movq %rax, %rcx
movq %rdx, %r8
# A[0] * B[1]
movq 104(%rsp), %rax
mulq 128(%rsp)
xorq %r9, %r9
addq %rax, %r8
adcq %rdx, %r9
# A[1] * B[0]
movq 96(%rsp), %rax
mulq 136(%rsp)
xorq %r10, %r10
addq %rax, %r8
adcq %rdx, %r9
adcq $0x00, %r10
# A[0] * B[2]
movq 112(%rsp), %rax
mulq 128(%rsp)
addq %rax, %r9
adcq %rdx, %r10
# A[1] * B[1]
movq 104(%rsp), %rax
mulq 136(%rsp)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[2] * B[0]
movq 96(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[0] * B[3]
movq 120(%rsp), %rax
mulq 128(%rsp)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * B[2]
movq 112(%rsp), %rax
mulq 136(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[2] * B[1]
movq 104(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[3] * B[0]
movq 96(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * B[3]
movq 120(%rsp), %rax
mulq 136(%rsp)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[2]
movq 112(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[3] * B[1]
movq 104(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[3]
movq 120(%rsp), %rax
mulq 144(%rsp)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[2]
movq 112(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[3]
movq 120(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r13
adcq %rdx, %r14
movq $38, %rax
mulq %r14
addq %rax, %r10
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r10, %rdx
imulq $19, %rdx, %rdx
andq %rbx, %r10
movq %rdx, %rbx
movq $38, %rax
mulq %r11
xorq %r11, %r11
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r11
mulq %r12
xorq %r12, %r12
addq %rax, %r8
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
adcq %rdx, %r13
addq %rbx, %rcx
adcq %r11, %r8
adcq %r12, %r9
adcq %r13, %r10
# Store
movq %rcx, (%rdi)
movq %r8, 8(%rdi)
movq %r9, 16(%rdi)
movq %r10, 24(%rdi)
# Sub
movq 128(%rsp), %rcx
movq 136(%rsp), %r8
movq 144(%rsp), %r9
movq 152(%rsp), %r10
subq 96(%rsp), %rcx
sbbq 104(%rsp), %r8
sbbq 112(%rsp), %r9
sbbq 120(%rsp), %r10
sbbq %rbx, %rbx
shldq $0x01, %r10, %rbx
imulq $-19, %rbx
btr $63, %r10
# Add modulus (if underflow)
subq %rbx, %rcx
sbbq $0x00, %r8
sbbq $0x00, %r9
sbbq $0x00, %r10
movq %rcx, 128(%rsp)
movq %r8, 136(%rsp)
movq %r9, 144(%rsp)
movq %r10, 152(%rsp)
# Multiply by 121666
movq $0x1db42, %rax
mulq 128(%rsp)
xorq %r9, %r9
movq %rax, %rcx
movq %rdx, %r8
movq $0x1db42, %rax
mulq 136(%rsp)
xorq %r10, %r10
addq %rax, %r8
adcq %rdx, %r9
movq $0x1db42, %rax
mulq 144(%rsp)
xorq %r12, %r12
addq %rax, %r9
adcq %rdx, %r10
movq $0x1db42, %rax
mulq 152(%rsp)
movq $0x7fffffffffffffff, %r11
addq %rax, %r10
adcq %rdx, %r12
addq 96(%rsp), %rcx
adcq 104(%rsp), %r8
adcq 112(%rsp), %r9
adcq 120(%rsp), %r10
adcq $0x00, %r12
shldq $0x01, %r10, %r12
andq %r11, %r10
movq $19, %rax
mulq %r12
addq %rax, %rcx
adcq $0x00, %r8
adcq $0x00, %r9
adcq $0x00, %r10
movq %rcx, 96(%rsp)
movq %r8, 104(%rsp)
movq %r9, 112(%rsp)
movq %r10, 120(%rsp)
# Multiply
# A[0] * B[0]
movq 96(%rsp), %rax
mulq 128(%rsp)
movq %rax, %rcx
movq %rdx, %r8
# A[0] * B[1]
movq 104(%rsp), %rax
mulq 128(%rsp)
xorq %r9, %r9
addq %rax, %r8
adcq %rdx, %r9
# A[1] * B[0]
movq 96(%rsp), %rax
mulq 136(%rsp)
xorq %r10, %r10
addq %rax, %r8
adcq %rdx, %r9
adcq $0x00, %r10
# A[0] * B[2]
movq 112(%rsp), %rax
mulq 128(%rsp)
addq %rax, %r9
adcq %rdx, %r10
# A[1] * B[1]
movq 104(%rsp), %rax
mulq 136(%rsp)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[2] * B[0]
movq 96(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[0] * B[3]
movq 120(%rsp), %rax
mulq 128(%rsp)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * B[2]
movq 112(%rsp), %rax
mulq 136(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[2] * B[1]
movq 104(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[3] * B[0]
movq 96(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * B[3]
movq 120(%rsp), %rax
mulq 136(%rsp)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[2]
movq 112(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[3] * B[1]
movq 104(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[3]
movq 120(%rsp), %rax
mulq 144(%rsp)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[2]
movq 112(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[3]
movq 120(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r13
adcq %rdx, %r14
movq $38, %rax
mulq %r14
addq %rax, %r10
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r10, %rdx
imulq $19, %rdx, %rdx
andq %rbx, %r10
movq %rdx, %rbx
movq $38, %rax
mulq %r11
xorq %r11, %r11
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r11
mulq %r12
xorq %r12, %r12
addq %rax, %r8
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
adcq %rdx, %r13
addq %rbx, %rcx
adcq %r11, %r8
adcq %r12, %r9
adcq %r13, %r10
# Store
movq %rcx, (%rsp)
movq %r8, 8(%rsp)
movq %r9, 16(%rsp)
movq %r10, 24(%rsp)
decq %rbp
jge L_curve25519_base_x64_3
# Invert
leaq 32(%rsp), %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
movq %rsp, %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 96(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $4, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $9, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 128(%rsp), %rdi
leaq 96(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 128(%rsp), %rdi
leaq 128(%rsp), %rsi
movq $19, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 128(%rsp), %rsi
leaq 96(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $9, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $49, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 128(%rsp), %rdi
leaq 96(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 128(%rsp), %rdi
leaq 128(%rsp), %rsi
movq $0x63, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 128(%rsp), %rsi
leaq 96(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $49, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $4, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
movq %rsp, %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
movq 160(%rsp), %rdi
# Multiply
# A[0] * B[0]
movq (%rsp), %rax
mulq (%rdi)
movq %rax, %rcx
movq %rdx, %r8
# A[0] * B[1]
movq 8(%rsp), %rax
mulq (%rdi)
xorq %r9, %r9
addq %rax, %r8
adcq %rdx, %r9
# A[1] * B[0]
movq (%rsp), %rax
mulq 8(%rdi)
xorq %r10, %r10
addq %rax, %r8
adcq %rdx, %r9
adcq $0x00, %r10
# A[0] * B[2]
movq 16(%rsp), %rax
mulq (%rdi)
addq %rax, %r9
adcq %rdx, %r10
# A[1] * B[1]
movq 8(%rsp), %rax
mulq 8(%rdi)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[2] * B[0]
movq (%rsp), %rax
mulq 16(%rdi)
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[0] * B[3]
movq 24(%rsp), %rax
mulq (%rdi)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * B[2]
movq 16(%rsp), %rax
mulq 8(%rdi)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[2] * B[1]
movq 8(%rsp), %rax
mulq 16(%rdi)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[3] * B[0]
movq (%rsp), %rax
mulq 24(%rdi)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * B[3]
movq 24(%rsp), %rax
mulq 8(%rdi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[2]
movq 16(%rsp), %rax
mulq 16(%rdi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[3] * B[1]
movq 8(%rsp), %rax
mulq 24(%rdi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[3]
movq 24(%rsp), %rax
mulq 16(%rdi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[2]
movq 16(%rsp), %rax
mulq 24(%rdi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[3]
movq 24(%rsp), %rax
mulq 24(%rdi)
addq %rax, %r13
adcq %rdx, %r14
movq $38, %rax
mulq %r14
addq %rax, %r10
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r10, %rdx
imulq $19, %rdx, %rdx
andq %rbx, %r10
movq %rdx, %rbx
movq $38, %rax
mulq %r11
xorq %r11, %r11
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r11
mulq %r12
xorq %r12, %r12
addq %rax, %r8
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
adcq %rdx, %r13
addq %rbx, %rcx
adcq %r11, %r8
adcq %r12, %r9
adcq %r13, %r10
movq $0x7fffffffffffffff, %rbx
movq %r10, %rax
sarq $63, %rax
andq $19, %rax
andq %rbx, %r10
addq %rax, %rcx
adcq $0x00, %r8
adcq $0x00, %r9
adcq $0x00, %r10
movq $0x7fffffffffffffff, %rax
movq %rcx, %rdx
addq $19, %rdx
movq %r8, %rdx
adcq $0x00, %rdx
movq %r9, %rdx
adcq $0x00, %rdx
movq %r10, %rdx
adcq $0x00, %rdx
sarq $63, %rdx
andq $19, %rdx
andq %rax, %r10
addq %rdx, %rcx
adcq $0x00, %r8
adcq $0x00, %r9
adcq $0x00, %r10
# Store
movq %rcx, (%rdi)
movq %r8, 8(%rdi)
movq %r9, 16(%rdi)
movq %r10, 24(%rdi)
xorq %rax, %rax
addq $0xa8, %rsp
popq %rbp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size curve25519_base_x64,.-curve25519_base_x64
#endif /* __APPLE__ */
#endif /* WOLFSSL_CURVE25519_NOT_USE_ED25519 */
#ifndef __APPLE__
.text
.globl curve25519_x64
.type curve25519_x64,@function
.align 16
curve25519_x64:
#else
.section __TEXT,__text
.globl _curve25519_x64
.p2align 4
_curve25519_x64:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
pushq %rbp
movq %rdx, %r8
subq $0xb0, %rsp
xorq %rbx, %rbx
movq %rdi, 168(%rsp)
# Set one
movq $0x01, (%rdi)
movq $0x00, 8(%rdi)
movq $0x00, 16(%rdi)
movq $0x00, 24(%rdi)
# Set zero
movq $0x00, (%rsp)
movq $0x00, 8(%rsp)
movq $0x00, 16(%rsp)
movq $0x00, 24(%rsp)
# Set one
movq $0x01, 32(%rsp)
movq $0x00, 40(%rsp)
movq $0x00, 48(%rsp)
movq $0x00, 56(%rsp)
# Copy
movq (%r8), %rcx
movq 8(%r8), %r9
movq 16(%r8), %r10
movq 24(%r8), %r11
movq %rcx, 64(%rsp)
movq %r9, 72(%rsp)
movq %r10, 80(%rsp)
movq %r11, 88(%rsp)
movq $0xfe, %r9
L_curve25519_x64_bits:
movq %r9, 160(%rsp)
movq %r9, %rcx
andq $63, %rcx
shrq $6, %r9
movq (%rsi,%r9,8), %rbp
shrq %cl, %rbp
andq $0x01, %rbp
xorq %rbp, %rbx
negq %rbx
# Conditional Swap
movq (%rdi), %rcx
movq 8(%rdi), %r9
movq 16(%rdi), %r10
movq 24(%rdi), %r11
movq (%rsp), %r12
movq 8(%rsp), %r13
movq 16(%rsp), %r14
movq 24(%rsp), %r15
xorq 64(%rsp), %rcx
xorq 72(%rsp), %r9
xorq 80(%rsp), %r10
xorq 88(%rsp), %r11
xorq 32(%rsp), %r12
xorq 40(%rsp), %r13
xorq 48(%rsp), %r14
xorq 56(%rsp), %r15
andq %rbx, %rcx
andq %rbx, %r9
andq %rbx, %r10
andq %rbx, %r11
andq %rbx, %r12
andq %rbx, %r13
andq %rbx, %r14
andq %rbx, %r15
xorq %rcx, (%rdi)
xorq %r9, 8(%rdi)
xorq %r10, 16(%rdi)
xorq %r11, 24(%rdi)
xorq %r12, (%rsp)
xorq %r13, 8(%rsp)
xorq %r14, 16(%rsp)
xorq %r15, 24(%rsp)
xorq %rcx, 64(%rsp)
xorq %r9, 72(%rsp)
xorq %r10, 80(%rsp)
xorq %r11, 88(%rsp)
xorq %r12, 32(%rsp)
xorq %r13, 40(%rsp)
xorq %r14, 48(%rsp)
xorq %r15, 56(%rsp)
movq %rbp, %rbx
# Add-Sub
# Add
movq (%rdi), %rcx
movq 8(%rdi), %r9
movq 16(%rdi), %r10
movq 24(%rdi), %r11
movq %rcx, %r12
addq (%rsp), %rcx
movq %r9, %r13
adcq 8(%rsp), %r9
movq %r10, %r14
adcq 16(%rsp), %r10
movq %r11, %r15
adcq 24(%rsp), %r11
movq $0x00, %rbp
adcq $0x00, %rbp
shldq $0x01, %r11, %rbp
imulq $19, %rbp
btr $63, %r11
# Sub modulus (if overflow)
addq %rbp, %rcx
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
# Sub
subq (%rsp), %r12
sbbq 8(%rsp), %r13
sbbq 16(%rsp), %r14
sbbq 24(%rsp), %r15
sbbq %rbp, %rbp
shldq $0x01, %r15, %rbp
imulq $-19, %rbp
btr $63, %r15
# Add modulus (if underflow)
subq %rbp, %r12
sbbq $0x00, %r13
sbbq $0x00, %r14
sbbq $0x00, %r15
movq %rcx, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
movq %r12, 128(%rsp)
movq %r13, 136(%rsp)
movq %r14, 144(%rsp)
movq %r15, 152(%rsp)
# Add-Sub
# Add
movq 64(%rsp), %rcx
movq 72(%rsp), %r9
movq 80(%rsp), %r10
movq 88(%rsp), %r11
movq %rcx, %r12
addq 32(%rsp), %rcx
movq %r9, %r13
adcq 40(%rsp), %r9
movq %r10, %r14
adcq 48(%rsp), %r10
movq %r11, %r15
adcq 56(%rsp), %r11
movq $0x00, %rbp
adcq $0x00, %rbp
shldq $0x01, %r11, %rbp
imulq $19, %rbp
btr $63, %r11
# Sub modulus (if overflow)
addq %rbp, %rcx
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
# Sub
subq 32(%rsp), %r12
sbbq 40(%rsp), %r13
sbbq 48(%rsp), %r14
sbbq 56(%rsp), %r15
sbbq %rbp, %rbp
shldq $0x01, %r15, %rbp
imulq $-19, %rbp
btr $63, %r15
# Add modulus (if underflow)
subq %rbp, %r12
sbbq $0x00, %r13
sbbq $0x00, %r14
sbbq $0x00, %r15
movq %rcx, 32(%rsp)
movq %r9, 40(%rsp)
movq %r10, 48(%rsp)
movq %r11, 56(%rsp)
movq %r12, 96(%rsp)
movq %r13, 104(%rsp)
movq %r14, 112(%rsp)
movq %r15, 120(%rsp)
# Multiply
# A[0] * B[0]
movq 128(%rsp), %rax
mulq 32(%rsp)
movq %rax, %rcx
movq %rdx, %r9
# A[0] * B[1]
movq 136(%rsp), %rax
mulq 32(%rsp)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[1] * B[0]
movq 128(%rsp), %rax
mulq 40(%rsp)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[0] * B[2]
movq 144(%rsp), %rax
mulq 32(%rsp)
addq %rax, %r10
adcq %rdx, %r11
# A[1] * B[1]
movq 136(%rsp), %rax
mulq 40(%rsp)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[2] * B[0]
movq 128(%rsp), %rax
mulq 48(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[0] * B[3]
movq 152(%rsp), %rax
mulq 32(%rsp)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[2]
movq 144(%rsp), %rax
mulq 40(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[1]
movq 136(%rsp), %rax
mulq 48(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[3] * B[0]
movq 128(%rsp), %rax
mulq 56(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[3]
movq 152(%rsp), %rax
mulq 40(%rsp)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[2]
movq 144(%rsp), %rax
mulq 48(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[1]
movq 136(%rsp), %rax
mulq 56(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[3]
movq 152(%rsp), %rax
mulq 48(%rsp)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[2]
movq 144(%rsp), %rax
mulq 56(%rsp)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[3]
movq 152(%rsp), %rax
mulq 56(%rsp)
addq %rax, %r14
adcq %rdx, %r15
movq $38, %rax
mulq %r15
addq %rax, %r11
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbp
shldq $0x01, %r11, %rdx
imulq $19, %rdx, %rdx
andq %rbp, %r11
movq %rdx, %rbp
movq $38, %rax
mulq %r12
xorq %r12, %r12
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
adcq %rdx, %r14
addq %rbp, %rcx
adcq %r12, %r9
adcq %r13, %r10
adcq %r14, %r11
# Store
movq %rcx, 32(%rsp)
movq %r9, 40(%rsp)
movq %r10, 48(%rsp)
movq %r11, 56(%rsp)
# Multiply
# A[0] * B[0]
movq (%rdi), %rax
mulq 96(%rsp)
movq %rax, %rcx
movq %rdx, %r9
# A[0] * B[1]
movq 8(%rdi), %rax
mulq 96(%rsp)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[1] * B[0]
movq (%rdi), %rax
mulq 104(%rsp)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[0] * B[2]
movq 16(%rdi), %rax
mulq 96(%rsp)
addq %rax, %r10
adcq %rdx, %r11
# A[1] * B[1]
movq 8(%rdi), %rax
mulq 104(%rsp)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[2] * B[0]
movq (%rdi), %rax
mulq 112(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[0] * B[3]
movq 24(%rdi), %rax
mulq 96(%rsp)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[2]
movq 16(%rdi), %rax
mulq 104(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[1]
movq 8(%rdi), %rax
mulq 112(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[3] * B[0]
movq (%rdi), %rax
mulq 120(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[3]
movq 24(%rdi), %rax
mulq 104(%rsp)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[2]
movq 16(%rdi), %rax
mulq 112(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[1]
movq 8(%rdi), %rax
mulq 120(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[3]
movq 24(%rdi), %rax
mulq 112(%rsp)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[2]
movq 16(%rdi), %rax
mulq 120(%rsp)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[3]
movq 24(%rdi), %rax
mulq 120(%rsp)
addq %rax, %r14
adcq %rdx, %r15
movq $38, %rax
mulq %r15
addq %rax, %r11
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbp
shldq $0x01, %r11, %rdx
imulq $19, %rdx, %rdx
andq %rbp, %r11
movq %rdx, %rbp
movq $38, %rax
mulq %r12
xorq %r12, %r12
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
adcq %rdx, %r14
addq %rbp, %rcx
adcq %r12, %r9
adcq %r13, %r10
adcq %r14, %r11
# Store
movq %rcx, (%rsp)
movq %r9, 8(%rsp)
movq %r10, 16(%rsp)
movq %r11, 24(%rsp)
# Square
# A[0] * A[1]
movq 128(%rsp), %rax
mulq 136(%rsp)
movq %rax, %r9
movq %rdx, %r10
# A[0] * A[2]
movq 128(%rsp), %rax
mulq 144(%rsp)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[0] * A[3]
movq 128(%rsp), %rax
mulq 152(%rsp)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * A[2]
movq 136(%rsp), %rax
mulq 144(%rsp)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * A[3]
movq 136(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r12
adcq %rdx, %r13
# A[2] * A[3]
movq 144(%rsp), %rax
mulq 152(%rsp)
xorq %r14, %r14
addq %rax, %r13
adcq %rdx, %r14
# Double
xorq %r15, %r15
addq %r9, %r9
adcq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq %r14, %r14
adcq $0x00, %r15
# A[0] * A[0]
movq 128(%rsp), %rax
mulq %rax
movq %rax, %rcx
movq %rdx, %rbp
# A[1] * A[1]
movq 136(%rsp), %rax
mulq %rax
addq %rbp, %r9
adcq %rax, %r10
adcq $0x00, %rdx
movq %rdx, %rbp
# A[2] * A[2]
movq 144(%rsp), %rax
mulq %rax
addq %rbp, %r11
adcq %rax, %r12
adcq $0x00, %rdx
movq %rdx, %rbp
# A[3] * A[3]
movq 152(%rsp), %rax
mulq %rax
addq %rax, %r14
adcq %rdx, %r15
addq %rbp, %r13
adcq $0x00, %r14
adcq $0x00, %r15
movq $38, %rax
mulq %r15
addq %rax, %r11
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbp
shldq $0x01, %r11, %rdx
imulq $19, %rdx, %rdx
andq %rbp, %r11
movq %rdx, %rbp
movq $38, %rax
mulq %r12
xorq %r12, %r12
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
adcq %rdx, %r14
addq %rbp, %rcx
adcq %r12, %r9
adcq %r13, %r10
adcq %r14, %r11
# Store
movq %rcx, 96(%rsp)
movq %r9, 104(%rsp)
movq %r10, 112(%rsp)
movq %r11, 120(%rsp)
# Square
# A[0] * A[1]
movq (%rdi), %rax
mulq 8(%rdi)
movq %rax, %r9
movq %rdx, %r10
# A[0] * A[2]
movq (%rdi), %rax
mulq 16(%rdi)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[0] * A[3]
movq (%rdi), %rax
mulq 24(%rdi)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * A[2]
movq 8(%rdi), %rax
mulq 16(%rdi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * A[3]
movq 8(%rdi), %rax
mulq 24(%rdi)
addq %rax, %r12
adcq %rdx, %r13
# A[2] * A[3]
movq 16(%rdi), %rax
mulq 24(%rdi)
xorq %r14, %r14
addq %rax, %r13
adcq %rdx, %r14
# Double
xorq %r15, %r15
addq %r9, %r9
adcq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq %r14, %r14
adcq $0x00, %r15
# A[0] * A[0]
movq (%rdi), %rax
mulq %rax
movq %rax, %rcx
movq %rdx, %rbp
# A[1] * A[1]
movq 8(%rdi), %rax
mulq %rax
addq %rbp, %r9
adcq %rax, %r10
adcq $0x00, %rdx
movq %rdx, %rbp
# A[2] * A[2]
movq 16(%rdi), %rax
mulq %rax
addq %rbp, %r11
adcq %rax, %r12
adcq $0x00, %rdx
movq %rdx, %rbp
# A[3] * A[3]
movq 24(%rdi), %rax
mulq %rax
addq %rax, %r14
adcq %rdx, %r15
addq %rbp, %r13
adcq $0x00, %r14
adcq $0x00, %r15
movq $38, %rax
mulq %r15
addq %rax, %r11
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbp
shldq $0x01, %r11, %rdx
imulq $19, %rdx, %rdx
andq %rbp, %r11
movq %rdx, %rbp
movq $38, %rax
mulq %r12
xorq %r12, %r12
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
adcq %rdx, %r14
addq %rbp, %rcx
adcq %r12, %r9
adcq %r13, %r10
adcq %r14, %r11
# Store
movq %rcx, 128(%rsp)
movq %r9, 136(%rsp)
movq %r10, 144(%rsp)
movq %r11, 152(%rsp)
# Add-Sub
# Add
movq (%rsp), %rcx
movq 8(%rsp), %r9
movq 16(%rsp), %r10
movq 24(%rsp), %r11
movq %rcx, %r12
addq 32(%rsp), %rcx
movq %r9, %r13
adcq 40(%rsp), %r9
movq %r10, %r14
adcq 48(%rsp), %r10
movq %r11, %r15
adcq 56(%rsp), %r11
movq $0x00, %rbp
adcq $0x00, %rbp
shldq $0x01, %r11, %rbp
imulq $19, %rbp
btr $63, %r11
# Sub modulus (if overflow)
addq %rbp, %rcx
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
# Sub
subq 32(%rsp), %r12
sbbq 40(%rsp), %r13
sbbq 48(%rsp), %r14
sbbq 56(%rsp), %r15
sbbq %rbp, %rbp
shldq $0x01, %r15, %rbp
imulq $-19, %rbp
btr $63, %r15
# Add modulus (if underflow)
subq %rbp, %r12
sbbq $0x00, %r13
sbbq $0x00, %r14
sbbq $0x00, %r15
movq %rcx, 64(%rsp)
movq %r9, 72(%rsp)
movq %r10, 80(%rsp)
movq %r11, 88(%rsp)
movq %r12, 32(%rsp)
movq %r13, 40(%rsp)
movq %r14, 48(%rsp)
movq %r15, 56(%rsp)
# Multiply
# A[0] * B[0]
movq 96(%rsp), %rax
mulq 128(%rsp)
movq %rax, %rcx
movq %rdx, %r9
# A[0] * B[1]
movq 104(%rsp), %rax
mulq 128(%rsp)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[1] * B[0]
movq 96(%rsp), %rax
mulq 136(%rsp)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[0] * B[2]
movq 112(%rsp), %rax
mulq 128(%rsp)
addq %rax, %r10
adcq %rdx, %r11
# A[1] * B[1]
movq 104(%rsp), %rax
mulq 136(%rsp)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[2] * B[0]
movq 96(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[0] * B[3]
movq 120(%rsp), %rax
mulq 128(%rsp)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[2]
movq 112(%rsp), %rax
mulq 136(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[1]
movq 104(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[3] * B[0]
movq 96(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[3]
movq 120(%rsp), %rax
mulq 136(%rsp)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[2]
movq 112(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[1]
movq 104(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[3]
movq 120(%rsp), %rax
mulq 144(%rsp)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[2]
movq 112(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[3]
movq 120(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r14
adcq %rdx, %r15
movq $38, %rax
mulq %r15
addq %rax, %r11
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbp
shldq $0x01, %r11, %rdx
imulq $19, %rdx, %rdx
andq %rbp, %r11
movq %rdx, %rbp
movq $38, %rax
mulq %r12
xorq %r12, %r12
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
adcq %rdx, %r14
addq %rbp, %rcx
adcq %r12, %r9
adcq %r13, %r10
adcq %r14, %r11
# Store
movq %rcx, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
# Sub
movq 128(%rsp), %rcx
movq 136(%rsp), %r9
movq 144(%rsp), %r10
movq 152(%rsp), %r11
subq 96(%rsp), %rcx
sbbq 104(%rsp), %r9
sbbq 112(%rsp), %r10
sbbq 120(%rsp), %r11
sbbq %rbp, %rbp
shldq $0x01, %r11, %rbp
imulq $-19, %rbp
btr $63, %r11
# Add modulus (if underflow)
subq %rbp, %rcx
sbbq $0x00, %r9
sbbq $0x00, %r10
sbbq $0x00, %r11
movq %rcx, 128(%rsp)
movq %r9, 136(%rsp)
movq %r10, 144(%rsp)
movq %r11, 152(%rsp)
# Square
# A[0] * A[1]
movq 32(%rsp), %rax
mulq 40(%rsp)
movq %rax, %r9
movq %rdx, %r10
# A[0] * A[2]
movq 32(%rsp), %rax
mulq 48(%rsp)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[0] * A[3]
movq 32(%rsp), %rax
mulq 56(%rsp)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * A[2]
movq 40(%rsp), %rax
mulq 48(%rsp)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * A[3]
movq 40(%rsp), %rax
mulq 56(%rsp)
addq %rax, %r12
adcq %rdx, %r13
# A[2] * A[3]
movq 48(%rsp), %rax
mulq 56(%rsp)
xorq %r14, %r14
addq %rax, %r13
adcq %rdx, %r14
# Double
xorq %r15, %r15
addq %r9, %r9
adcq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq %r14, %r14
adcq $0x00, %r15
# A[0] * A[0]
movq 32(%rsp), %rax
mulq %rax
movq %rax, %rcx
movq %rdx, %rbp
# A[1] * A[1]
movq 40(%rsp), %rax
mulq %rax
addq %rbp, %r9
adcq %rax, %r10
adcq $0x00, %rdx
movq %rdx, %rbp
# A[2] * A[2]
movq 48(%rsp), %rax
mulq %rax
addq %rbp, %r11
adcq %rax, %r12
adcq $0x00, %rdx
movq %rdx, %rbp
# A[3] * A[3]
movq 56(%rsp), %rax
mulq %rax
addq %rax, %r14
adcq %rdx, %r15
addq %rbp, %r13
adcq $0x00, %r14
adcq $0x00, %r15
movq $38, %rax
mulq %r15
addq %rax, %r11
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbp
shldq $0x01, %r11, %rdx
imulq $19, %rdx, %rdx
andq %rbp, %r11
movq %rdx, %rbp
movq $38, %rax
mulq %r12
xorq %r12, %r12
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
adcq %rdx, %r14
addq %rbp, %rcx
adcq %r12, %r9
adcq %r13, %r10
adcq %r14, %r11
# Store
movq %rcx, 32(%rsp)
movq %r9, 40(%rsp)
movq %r10, 48(%rsp)
movq %r11, 56(%rsp)
# Square
# A[0] * A[1]
movq 64(%rsp), %rax
mulq 72(%rsp)
movq %rax, %r9
movq %rdx, %r10
# A[0] * A[2]
movq 64(%rsp), %rax
mulq 80(%rsp)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[0] * A[3]
movq 64(%rsp), %rax
mulq 88(%rsp)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * A[2]
movq 72(%rsp), %rax
mulq 80(%rsp)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * A[3]
movq 72(%rsp), %rax
mulq 88(%rsp)
addq %rax, %r12
adcq %rdx, %r13
# A[2] * A[3]
movq 80(%rsp), %rax
mulq 88(%rsp)
xorq %r14, %r14
addq %rax, %r13
adcq %rdx, %r14
# Double
xorq %r15, %r15
addq %r9, %r9
adcq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq %r14, %r14
adcq $0x00, %r15
# A[0] * A[0]
movq 64(%rsp), %rax
mulq %rax
movq %rax, %rcx
movq %rdx, %rbp
# A[1] * A[1]
movq 72(%rsp), %rax
mulq %rax
addq %rbp, %r9
adcq %rax, %r10
adcq $0x00, %rdx
movq %rdx, %rbp
# A[2] * A[2]
movq 80(%rsp), %rax
mulq %rax
addq %rbp, %r11
adcq %rax, %r12
adcq $0x00, %rdx
movq %rdx, %rbp
# A[3] * A[3]
movq 88(%rsp), %rax
mulq %rax
addq %rax, %r14
adcq %rdx, %r15
addq %rbp, %r13
adcq $0x00, %r14
adcq $0x00, %r15
movq $38, %rax
mulq %r15
addq %rax, %r11
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbp
shldq $0x01, %r11, %rdx
imulq $19, %rdx, %rdx
andq %rbp, %r11
movq %rdx, %rbp
movq $38, %rax
mulq %r12
xorq %r12, %r12
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
adcq %rdx, %r14
addq %rbp, %rcx
adcq %r12, %r9
adcq %r13, %r10
adcq %r14, %r11
# Store
movq %rcx, 64(%rsp)
movq %r9, 72(%rsp)
movq %r10, 80(%rsp)
movq %r11, 88(%rsp)
# Multiply by 121666
movq $0x1db42, %rax
mulq 128(%rsp)
xorq %r10, %r10
movq %rax, %rcx
movq %rdx, %r9
movq $0x1db42, %rax
mulq 136(%rsp)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
movq $0x1db42, %rax
mulq 144(%rsp)
xorq %r13, %r13
addq %rax, %r10
adcq %rdx, %r11
movq $0x1db42, %rax
mulq 152(%rsp)
movq $0x7fffffffffffffff, %r12
addq %rax, %r11
adcq %rdx, %r13
addq 96(%rsp), %rcx
adcq 104(%rsp), %r9
adcq 112(%rsp), %r10
adcq 120(%rsp), %r11
adcq $0x00, %r13
shldq $0x01, %r11, %r13
andq %r12, %r11
movq $19, %rax
mulq %r13
addq %rax, %rcx
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
movq %rcx, 96(%rsp)
movq %r9, 104(%rsp)
movq %r10, 112(%rsp)
movq %r11, 120(%rsp)
# Multiply
# A[0] * B[0]
movq 32(%rsp), %rax
mulq (%r8)
movq %rax, %rcx
movq %rdx, %r9
# A[0] * B[1]
movq 40(%rsp), %rax
mulq (%r8)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[1] * B[0]
movq 32(%rsp), %rax
mulq 8(%r8)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[0] * B[2]
movq 48(%rsp), %rax
mulq (%r8)
addq %rax, %r10
adcq %rdx, %r11
# A[1] * B[1]
movq 40(%rsp), %rax
mulq 8(%r8)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[2] * B[0]
movq 32(%rsp), %rax
mulq 16(%r8)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[0] * B[3]
movq 56(%rsp), %rax
mulq (%r8)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[2]
movq 48(%rsp), %rax
mulq 8(%r8)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[1]
movq 40(%rsp), %rax
mulq 16(%r8)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[3] * B[0]
movq 32(%rsp), %rax
mulq 24(%r8)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[3]
movq 56(%rsp), %rax
mulq 8(%r8)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[2]
movq 48(%rsp), %rax
mulq 16(%r8)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[1]
movq 40(%rsp), %rax
mulq 24(%r8)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[3]
movq 56(%rsp), %rax
mulq 16(%r8)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[2]
movq 48(%rsp), %rax
mulq 24(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[3]
movq 56(%rsp), %rax
mulq 24(%r8)
addq %rax, %r14
adcq %rdx, %r15
movq $38, %rax
mulq %r15
addq %rax, %r11
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbp
shldq $0x01, %r11, %rdx
imulq $19, %rdx, %rdx
andq %rbp, %r11
movq %rdx, %rbp
movq $38, %rax
mulq %r12
xorq %r12, %r12
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
adcq %rdx, %r14
addq %rbp, %rcx
adcq %r12, %r9
adcq %r13, %r10
adcq %r14, %r11
# Store
movq %rcx, 32(%rsp)
movq %r9, 40(%rsp)
movq %r10, 48(%rsp)
movq %r11, 56(%rsp)
# Multiply
# A[0] * B[0]
movq 96(%rsp), %rax
mulq 128(%rsp)
movq %rax, %rcx
movq %rdx, %r9
# A[0] * B[1]
movq 104(%rsp), %rax
mulq 128(%rsp)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[1] * B[0]
movq 96(%rsp), %rax
mulq 136(%rsp)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[0] * B[2]
movq 112(%rsp), %rax
mulq 128(%rsp)
addq %rax, %r10
adcq %rdx, %r11
# A[1] * B[1]
movq 104(%rsp), %rax
mulq 136(%rsp)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[2] * B[0]
movq 96(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[0] * B[3]
movq 120(%rsp), %rax
mulq 128(%rsp)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[2]
movq 112(%rsp), %rax
mulq 136(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[1]
movq 104(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[3] * B[0]
movq 96(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[3]
movq 120(%rsp), %rax
mulq 136(%rsp)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[2]
movq 112(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[1]
movq 104(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[3]
movq 120(%rsp), %rax
mulq 144(%rsp)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[2]
movq 112(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[3]
movq 120(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r14
adcq %rdx, %r15
movq $38, %rax
mulq %r15
addq %rax, %r11
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbp
shldq $0x01, %r11, %rdx
imulq $19, %rdx, %rdx
andq %rbp, %r11
movq %rdx, %rbp
movq $38, %rax
mulq %r12
xorq %r12, %r12
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
adcq %rdx, %r14
addq %rbp, %rcx
adcq %r12, %r9
adcq %r13, %r10
adcq %r14, %r11
# Store
movq %rcx, (%rsp)
movq %r9, 8(%rsp)
movq %r10, 16(%rsp)
movq %r11, 24(%rsp)
movq 160(%rsp), %r9
decq %r9
cmpq $3, %r9
jge L_curve25519_x64_bits
movq $2, 160(%rsp)
negq %rbx
# Conditional Swap
movq (%rdi), %rcx
movq 8(%rdi), %r9
movq 16(%rdi), %r10
movq 24(%rdi), %r11
movq (%rsp), %r12
movq 8(%rsp), %r13
movq 16(%rsp), %r14
movq 24(%rsp), %r15
xorq 64(%rsp), %rcx
xorq 72(%rsp), %r9
xorq 80(%rsp), %r10
xorq 88(%rsp), %r11
xorq 32(%rsp), %r12
xorq 40(%rsp), %r13
xorq 48(%rsp), %r14
xorq 56(%rsp), %r15
andq %rbx, %rcx
andq %rbx, %r9
andq %rbx, %r10
andq %rbx, %r11
andq %rbx, %r12
andq %rbx, %r13
andq %rbx, %r14
andq %rbx, %r15
xorq %rcx, (%rdi)
xorq %r9, 8(%rdi)
xorq %r10, 16(%rdi)
xorq %r11, 24(%rdi)
xorq %r12, (%rsp)
xorq %r13, 8(%rsp)
xorq %r14, 16(%rsp)
xorq %r15, 24(%rsp)
xorq %rcx, 64(%rsp)
xorq %r9, 72(%rsp)
xorq %r10, 80(%rsp)
xorq %r11, 88(%rsp)
xorq %r12, 32(%rsp)
xorq %r13, 40(%rsp)
xorq %r14, 48(%rsp)
xorq %r15, 56(%rsp)
L_curve25519_x64_3:
# Add-Sub
# Add
movq (%rdi), %rcx
movq 8(%rdi), %r9
movq 16(%rdi), %r10
movq 24(%rdi), %r11
movq %rcx, %r12
addq (%rsp), %rcx
movq %r9, %r13
adcq 8(%rsp), %r9
movq %r10, %r14
adcq 16(%rsp), %r10
movq %r11, %r15
adcq 24(%rsp), %r11
movq $0x00, %rbp
adcq $0x00, %rbp
shldq $0x01, %r11, %rbp
imulq $19, %rbp
btr $63, %r11
# Sub modulus (if overflow)
addq %rbp, %rcx
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
# Sub
subq (%rsp), %r12
sbbq 8(%rsp), %r13
sbbq 16(%rsp), %r14
sbbq 24(%rsp), %r15
sbbq %rbp, %rbp
shldq $0x01, %r15, %rbp
imulq $-19, %rbp
btr $63, %r15
# Add modulus (if underflow)
subq %rbp, %r12
sbbq $0x00, %r13
sbbq $0x00, %r14
sbbq $0x00, %r15
movq %rcx, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
movq %r12, 128(%rsp)
movq %r13, 136(%rsp)
movq %r14, 144(%rsp)
movq %r15, 152(%rsp)
# Square
# A[0] * A[1]
movq 128(%rsp), %rax
mulq 136(%rsp)
movq %rax, %r9
movq %rdx, %r10
# A[0] * A[2]
movq 128(%rsp), %rax
mulq 144(%rsp)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[0] * A[3]
movq 128(%rsp), %rax
mulq 152(%rsp)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * A[2]
movq 136(%rsp), %rax
mulq 144(%rsp)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * A[3]
movq 136(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r12
adcq %rdx, %r13
# A[2] * A[3]
movq 144(%rsp), %rax
mulq 152(%rsp)
xorq %r14, %r14
addq %rax, %r13
adcq %rdx, %r14
# Double
xorq %r15, %r15
addq %r9, %r9
adcq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq %r14, %r14
adcq $0x00, %r15
# A[0] * A[0]
movq 128(%rsp), %rax
mulq %rax
movq %rax, %rcx
movq %rdx, %rbp
# A[1] * A[1]
movq 136(%rsp), %rax
mulq %rax
addq %rbp, %r9
adcq %rax, %r10
adcq $0x00, %rdx
movq %rdx, %rbp
# A[2] * A[2]
movq 144(%rsp), %rax
mulq %rax
addq %rbp, %r11
adcq %rax, %r12
adcq $0x00, %rdx
movq %rdx, %rbp
# A[3] * A[3]
movq 152(%rsp), %rax
mulq %rax
addq %rax, %r14
adcq %rdx, %r15
addq %rbp, %r13
adcq $0x00, %r14
adcq $0x00, %r15
movq $38, %rax
mulq %r15
addq %rax, %r11
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbp
shldq $0x01, %r11, %rdx
imulq $19, %rdx, %rdx
andq %rbp, %r11
movq %rdx, %rbp
movq $38, %rax
mulq %r12
xorq %r12, %r12
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
adcq %rdx, %r14
addq %rbp, %rcx
adcq %r12, %r9
adcq %r13, %r10
adcq %r14, %r11
# Store
movq %rcx, 96(%rsp)
movq %r9, 104(%rsp)
movq %r10, 112(%rsp)
movq %r11, 120(%rsp)
# Square
# A[0] * A[1]
movq (%rdi), %rax
mulq 8(%rdi)
movq %rax, %r9
movq %rdx, %r10
# A[0] * A[2]
movq (%rdi), %rax
mulq 16(%rdi)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[0] * A[3]
movq (%rdi), %rax
mulq 24(%rdi)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * A[2]
movq 8(%rdi), %rax
mulq 16(%rdi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * A[3]
movq 8(%rdi), %rax
mulq 24(%rdi)
addq %rax, %r12
adcq %rdx, %r13
# A[2] * A[3]
movq 16(%rdi), %rax
mulq 24(%rdi)
xorq %r14, %r14
addq %rax, %r13
adcq %rdx, %r14
# Double
xorq %r15, %r15
addq %r9, %r9
adcq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq %r14, %r14
adcq $0x00, %r15
# A[0] * A[0]
movq (%rdi), %rax
mulq %rax
movq %rax, %rcx
movq %rdx, %rbp
# A[1] * A[1]
movq 8(%rdi), %rax
mulq %rax
addq %rbp, %r9
adcq %rax, %r10
adcq $0x00, %rdx
movq %rdx, %rbp
# A[2] * A[2]
movq 16(%rdi), %rax
mulq %rax
addq %rbp, %r11
adcq %rax, %r12
adcq $0x00, %rdx
movq %rdx, %rbp
# A[3] * A[3]
movq 24(%rdi), %rax
mulq %rax
addq %rax, %r14
adcq %rdx, %r15
addq %rbp, %r13
adcq $0x00, %r14
adcq $0x00, %r15
movq $38, %rax
mulq %r15
addq %rax, %r11
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbp
shldq $0x01, %r11, %rdx
imulq $19, %rdx, %rdx
andq %rbp, %r11
movq %rdx, %rbp
movq $38, %rax
mulq %r12
xorq %r12, %r12
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
adcq %rdx, %r14
addq %rbp, %rcx
adcq %r12, %r9
adcq %r13, %r10
adcq %r14, %r11
# Store
movq %rcx, 128(%rsp)
movq %r9, 136(%rsp)
movq %r10, 144(%rsp)
movq %r11, 152(%rsp)
# Multiply
# A[0] * B[0]
movq 96(%rsp), %rax
mulq 128(%rsp)
movq %rax, %rcx
movq %rdx, %r9
# A[0] * B[1]
movq 104(%rsp), %rax
mulq 128(%rsp)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[1] * B[0]
movq 96(%rsp), %rax
mulq 136(%rsp)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[0] * B[2]
movq 112(%rsp), %rax
mulq 128(%rsp)
addq %rax, %r10
adcq %rdx, %r11
# A[1] * B[1]
movq 104(%rsp), %rax
mulq 136(%rsp)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[2] * B[0]
movq 96(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[0] * B[3]
movq 120(%rsp), %rax
mulq 128(%rsp)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[2]
movq 112(%rsp), %rax
mulq 136(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[1]
movq 104(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[3] * B[0]
movq 96(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[3]
movq 120(%rsp), %rax
mulq 136(%rsp)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[2]
movq 112(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[1]
movq 104(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[3]
movq 120(%rsp), %rax
mulq 144(%rsp)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[2]
movq 112(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[3]
movq 120(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r14
adcq %rdx, %r15
movq $38, %rax
mulq %r15
addq %rax, %r11
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbp
shldq $0x01, %r11, %rdx
imulq $19, %rdx, %rdx
andq %rbp, %r11
movq %rdx, %rbp
movq $38, %rax
mulq %r12
xorq %r12, %r12
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
adcq %rdx, %r14
addq %rbp, %rcx
adcq %r12, %r9
adcq %r13, %r10
adcq %r14, %r11
# Store
movq %rcx, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
# Sub
movq 128(%rsp), %rcx
movq 136(%rsp), %r9
movq 144(%rsp), %r10
movq 152(%rsp), %r11
subq 96(%rsp), %rcx
sbbq 104(%rsp), %r9
sbbq 112(%rsp), %r10
sbbq 120(%rsp), %r11
sbbq %rbp, %rbp
shldq $0x01, %r11, %rbp
imulq $-19, %rbp
btr $63, %r11
# Add modulus (if underflow)
subq %rbp, %rcx
sbbq $0x00, %r9
sbbq $0x00, %r10
sbbq $0x00, %r11
movq %rcx, 128(%rsp)
movq %r9, 136(%rsp)
movq %r10, 144(%rsp)
movq %r11, 152(%rsp)
# Multiply by 121666
movq $0x1db42, %rax
mulq 128(%rsp)
xorq %r10, %r10
movq %rax, %rcx
movq %rdx, %r9
movq $0x1db42, %rax
mulq 136(%rsp)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
movq $0x1db42, %rax
mulq 144(%rsp)
xorq %r13, %r13
addq %rax, %r10
adcq %rdx, %r11
movq $0x1db42, %rax
mulq 152(%rsp)
movq $0x7fffffffffffffff, %r12
addq %rax, %r11
adcq %rdx, %r13
addq 96(%rsp), %rcx
adcq 104(%rsp), %r9
adcq 112(%rsp), %r10
adcq 120(%rsp), %r11
adcq $0x00, %r13
shldq $0x01, %r11, %r13
andq %r12, %r11
movq $19, %rax
mulq %r13
addq %rax, %rcx
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
movq %rcx, 96(%rsp)
movq %r9, 104(%rsp)
movq %r10, 112(%rsp)
movq %r11, 120(%rsp)
# Multiply
# A[0] * B[0]
movq 96(%rsp), %rax
mulq 128(%rsp)
movq %rax, %rcx
movq %rdx, %r9
# A[0] * B[1]
movq 104(%rsp), %rax
mulq 128(%rsp)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[1] * B[0]
movq 96(%rsp), %rax
mulq 136(%rsp)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[0] * B[2]
movq 112(%rsp), %rax
mulq 128(%rsp)
addq %rax, %r10
adcq %rdx, %r11
# A[1] * B[1]
movq 104(%rsp), %rax
mulq 136(%rsp)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[2] * B[0]
movq 96(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[0] * B[3]
movq 120(%rsp), %rax
mulq 128(%rsp)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[2]
movq 112(%rsp), %rax
mulq 136(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[1]
movq 104(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[3] * B[0]
movq 96(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[3]
movq 120(%rsp), %rax
mulq 136(%rsp)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[2]
movq 112(%rsp), %rax
mulq 144(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[1]
movq 104(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[3]
movq 120(%rsp), %rax
mulq 144(%rsp)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[2]
movq 112(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[3]
movq 120(%rsp), %rax
mulq 152(%rsp)
addq %rax, %r14
adcq %rdx, %r15
movq $38, %rax
mulq %r15
addq %rax, %r11
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbp
shldq $0x01, %r11, %rdx
imulq $19, %rdx, %rdx
andq %rbp, %r11
movq %rdx, %rbp
movq $38, %rax
mulq %r12
xorq %r12, %r12
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
adcq %rdx, %r14
addq %rbp, %rcx
adcq %r12, %r9
adcq %r13, %r10
adcq %r14, %r11
# Store
movq %rcx, (%rsp)
movq %r9, 8(%rsp)
movq %r10, 16(%rsp)
movq %r11, 24(%rsp)
decq 160(%rsp)
jge L_curve25519_x64_3
# Invert
leaq 32(%rsp), %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
movq %rsp, %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 96(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $4, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $9, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 128(%rsp), %rdi
leaq 96(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 128(%rsp), %rdi
leaq 128(%rsp), %rsi
movq $19, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 128(%rsp), %rsi
leaq 96(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $9, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $49, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 128(%rsp), %rdi
leaq 96(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 128(%rsp), %rdi
leaq 128(%rsp), %rsi
movq $0x63, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 128(%rsp), %rsi
leaq 96(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $49, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $4, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
movq %rsp, %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
movq 168(%rsp), %rdi
# Multiply
# A[0] * B[0]
movq (%rsp), %rax
mulq (%rdi)
movq %rax, %rcx
movq %rdx, %r9
# A[0] * B[1]
movq 8(%rsp), %rax
mulq (%rdi)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[1] * B[0]
movq (%rsp), %rax
mulq 8(%rdi)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[0] * B[2]
movq 16(%rsp), %rax
mulq (%rdi)
addq %rax, %r10
adcq %rdx, %r11
# A[1] * B[1]
movq 8(%rsp), %rax
mulq 8(%rdi)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[2] * B[0]
movq (%rsp), %rax
mulq 16(%rdi)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[0] * B[3]
movq 24(%rsp), %rax
mulq (%rdi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[2]
movq 16(%rsp), %rax
mulq 8(%rdi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[1]
movq 8(%rsp), %rax
mulq 16(%rdi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[3] * B[0]
movq (%rsp), %rax
mulq 24(%rdi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[3]
movq 24(%rsp), %rax
mulq 8(%rdi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[2]
movq 16(%rsp), %rax
mulq 16(%rdi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[1]
movq 8(%rsp), %rax
mulq 24(%rdi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[3]
movq 24(%rsp), %rax
mulq 16(%rdi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[2]
movq 16(%rsp), %rax
mulq 24(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[3]
movq 24(%rsp), %rax
mulq 24(%rdi)
addq %rax, %r14
adcq %rdx, %r15
movq $38, %rax
mulq %r15
addq %rax, %r11
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %rbp
shldq $0x01, %r11, %rdx
imulq $19, %rdx, %rdx
andq %rbp, %r11
movq %rdx, %rbp
movq $38, %rax
mulq %r12
xorq %r12, %r12
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
adcq %rdx, %r14
addq %rbp, %rcx
adcq %r12, %r9
adcq %r13, %r10
adcq %r14, %r11
movq $0x7fffffffffffffff, %rbp
movq %r11, %rax
sarq $63, %rax
andq $19, %rax
andq %rbp, %r11
addq %rax, %rcx
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
movq $0x7fffffffffffffff, %rax
movq %rcx, %rdx
addq $19, %rdx
movq %r9, %rdx
adcq $0x00, %rdx
movq %r10, %rdx
adcq $0x00, %rdx
movq %r11, %rdx
adcq $0x00, %rdx
sarq $63, %rdx
andq $19, %rdx
andq %rax, %r11
addq %rdx, %rcx
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
# Store
movq %rcx, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
xorq %rax, %rax
addq $0xb0, %rsp
popq %rbp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size curve25519_x64,.-curve25519_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_pow22523_x64
.type fe_pow22523_x64,@function
.align 16
fe_pow22523_x64:
#else
.section __TEXT,__text
.globl _fe_pow22523_x64
.p2align 4
_fe_pow22523_x64:
#endif /* __APPLE__ */
subq $0x70, %rsp
# pow22523
movq %rdi, 96(%rsp)
movq %rsi, 104(%rsp)
movq %rsp, %rdi
movq 104(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
movq 104(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
movq %rsp, %rdi
movq %rsp, %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
movq %rsp, %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
movq %rsp, %rdi
leaq 32(%rsp), %rsi
movq %rsp, %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
movq $4, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
movq %rsp, %rdi
leaq 32(%rsp), %rsi
movq %rsp, %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
movq $9, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
movq %rsp, %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $19, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
movq $9, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
movq %rsp, %rdi
leaq 32(%rsp), %rsi
movq %rsp, %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
movq $49, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
movq %rsp, %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $0x63, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
movq $49, %rdx
#ifndef __APPLE__
callq fe_sq_n_x64@plt
#else
callq _fe_sq_n_x64
#endif /* __APPLE__ */
movq %rsp, %rdi
leaq 32(%rsp), %rsi
movq %rsp, %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
movq %rsp, %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
movq %rsp, %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_x64@plt
#else
callq _fe_sq_x64
#endif /* __APPLE__ */
movq 96(%rsp), %rdi
movq %rsp, %rsi
movq 104(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_x64@plt
#else
callq _fe_mul_x64
#endif /* __APPLE__ */
movq 104(%rsp), %rsi
movq 96(%rsp), %rdi
addq $0x70, %rsp
repz retq
#ifndef __APPLE__
.text
.globl ge_p1p1_to_p2_x64
.type ge_p1p1_to_p2_x64,@function
.align 16
ge_p1p1_to_p2_x64:
#else
.section __TEXT,__text
.globl _ge_p1p1_to_p2_x64
.p2align 4
_ge_p1p1_to_p2_x64:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
subq $16, %rsp
movq %rdi, (%rsp)
movq %rsi, 8(%rsp)
movq %rsi, %rcx
addq $0x60, %rcx
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%rsi)
movq %rax, %r9
movq %rdx, %r10
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%rsi)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%rsi)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%rsi)
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%rsi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%rsi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%rsi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%rsi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%rsi)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r15
adcq %rdx, %rbx
movq $38, %rax
mulq %rbx
addq %rax, %r12
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r12, %rdx
imulq $19, %rdx, %rdx
andq %r8, %r12
movq %rdx, %r8
movq $38, %rax
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
adcq %rdx, %r15
addq %r8, %r9
adcq %r13, %r10
adcq %r14, %r11
adcq %r15, %r12
# Store
movq %r9, (%rdi)
movq %r10, 8(%rdi)
movq %r11, 16(%rdi)
movq %r12, 24(%rdi)
addq $0x40, %rsi
addq $0x40, %rdi
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%rsi)
movq %rax, %r9
movq %rdx, %r10
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%rsi)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%rsi)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%rsi)
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%rsi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%rsi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%rsi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%rsi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%rsi)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r15
adcq %rdx, %rbx
movq $38, %rax
mulq %rbx
addq %rax, %r12
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r12, %rdx
imulq $19, %rdx, %rdx
andq %r8, %r12
movq %rdx, %r8
movq $38, %rax
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
adcq %rdx, %r15
addq %r8, %r9
adcq %r13, %r10
adcq %r14, %r11
adcq %r15, %r12
# Store
movq %r9, (%rdi)
movq %r10, 8(%rdi)
movq %r11, 16(%rdi)
movq %r12, 24(%rdi)
movq %rsi, %rcx
subq $32, %rcx
subq $32, %rdi
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%rsi)
movq %rax, %r9
movq %rdx, %r10
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%rsi)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%rsi)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%rsi)
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%rsi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%rsi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%rsi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%rsi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%rsi)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r15
adcq %rdx, %rbx
movq $38, %rax
mulq %rbx
addq %rax, %r12
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r12, %rdx
imulq $19, %rdx, %rdx
andq %r8, %r12
movq %rdx, %r8
movq $38, %rax
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
adcq %rdx, %r15
addq %r8, %r9
adcq %r13, %r10
adcq %r14, %r11
adcq %r15, %r12
# Store
movq %r9, (%rdi)
movq %r10, 8(%rdi)
movq %r11, 16(%rdi)
movq %r12, 24(%rdi)
addq $16, %rsp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size ge_p1p1_to_p2_x64,.-ge_p1p1_to_p2_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_p1p1_to_p3_x64
.type ge_p1p1_to_p3_x64,@function
.align 16
ge_p1p1_to_p3_x64:
#else
.section __TEXT,__text
.globl _ge_p1p1_to_p3_x64
.p2align 4
_ge_p1p1_to_p3_x64:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
subq $16, %rsp
movq %rdi, (%rsp)
movq %rsi, 8(%rsp)
movq %rsi, %rcx
addq $0x60, %rcx
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%rsi)
movq %rax, %r9
movq %rdx, %r10
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%rsi)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%rsi)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%rsi)
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%rsi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%rsi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%rsi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%rsi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%rsi)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r15
adcq %rdx, %rbx
movq $38, %rax
mulq %rbx
addq %rax, %r12
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r12, %rdx
imulq $19, %rdx, %rdx
andq %r8, %r12
movq %rdx, %r8
movq $38, %rax
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
adcq %rdx, %r15
addq %r8, %r9
adcq %r13, %r10
adcq %r14, %r11
adcq %r15, %r12
# Store
movq %r9, (%rdi)
movq %r10, 8(%rdi)
movq %r11, 16(%rdi)
movq %r12, 24(%rdi)
movq %rsi, %rcx
addq $32, %rcx
addq $0x60, %rdi
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%rsi)
movq %rax, %r9
movq %rdx, %r10
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%rsi)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%rsi)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%rsi)
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%rsi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%rsi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%rsi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%rsi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%rsi)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r15
adcq %rdx, %rbx
movq $38, %rax
mulq %rbx
addq %rax, %r12
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r12, %rdx
imulq $19, %rdx, %rdx
andq %r8, %r12
movq %rdx, %r8
movq $38, %rax
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
adcq %rdx, %r15
addq %r8, %r9
adcq %r13, %r10
adcq %r14, %r11
adcq %r15, %r12
# Store
movq %r9, (%rdi)
movq %r10, 8(%rdi)
movq %r11, 16(%rdi)
movq %r12, 24(%rdi)
addq $0x40, %rsi
subq $0x40, %rdi
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%rsi)
movq %rax, %r9
movq %rdx, %r10
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%rsi)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%rsi)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%rsi)
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%rsi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%rsi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%rsi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%rsi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%rsi)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r15
adcq %rdx, %rbx
movq $38, %rax
mulq %rbx
addq %rax, %r12
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r12, %rdx
imulq $19, %rdx, %rdx
andq %r8, %r12
movq %rdx, %r8
movq $38, %rax
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
adcq %rdx, %r15
addq %r8, %r9
adcq %r13, %r10
adcq %r14, %r11
adcq %r15, %r12
# Store
movq %r9, (%rdi)
movq %r10, 8(%rdi)
movq %r11, 16(%rdi)
movq %r12, 24(%rdi)
movq %rsi, %rcx
addq $32, %rcx
addq $32, %rdi
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%rsi)
movq %rax, %r9
movq %rdx, %r10
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%rsi)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%rsi)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%rsi)
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%rsi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%rsi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%rsi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%rsi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%rsi)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r15
adcq %rdx, %rbx
movq $38, %rax
mulq %rbx
addq %rax, %r12
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r12, %rdx
imulq $19, %rdx, %rdx
andq %r8, %r12
movq %rdx, %r8
movq $38, %rax
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
adcq %rdx, %r15
addq %r8, %r9
adcq %r13, %r10
adcq %r14, %r11
adcq %r15, %r12
# Store
movq %r9, (%rdi)
movq %r10, 8(%rdi)
movq %r11, 16(%rdi)
movq %r12, 24(%rdi)
addq $16, %rsp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size ge_p1p1_to_p3_x64,.-ge_p1p1_to_p3_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_p2_dbl_x64
.type ge_p2_dbl_x64,@function
.align 16
ge_p2_dbl_x64:
#else
.section __TEXT,__text
.globl _ge_p2_dbl_x64
.p2align 4
_ge_p2_dbl_x64:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
subq $16, %rsp
movq %rdi, (%rsp)
movq %rsi, 8(%rsp)
addq $0x40, %rdi
# Square
# A[0] * A[1]
movq (%rsi), %rax
mulq 8(%rsi)
movq %rax, %r10
movq %rdx, %r11
# A[0] * A[2]
movq (%rsi), %rax
mulq 16(%rsi)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[0] * A[3]
movq (%rsi), %rax
mulq 24(%rsi)
xorq %r13, %r13
addq %rax, %r12
adcq %rdx, %r13
# A[1] * A[2]
movq 8(%rsi), %rax
mulq 16(%rsi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * A[3]
movq 8(%rsi), %rax
mulq 24(%rsi)
addq %rax, %r13
adcq %rdx, %r14
# A[2] * A[3]
movq 16(%rsi), %rax
mulq 24(%rsi)
xorq %r15, %r15
addq %rax, %r14
adcq %rdx, %r15
# Double
xorq %rbx, %rbx
addq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq %r14, %r14
adcq %r15, %r15
adcq $0x00, %rbx
# A[0] * A[0]
movq (%rsi), %rax
mulq %rax
movq %rax, %r9
movq %rdx, %r8
# A[1] * A[1]
movq 8(%rsi), %rax
mulq %rax
addq %r8, %r10
adcq %rax, %r11
adcq $0x00, %rdx
movq %rdx, %r8
# A[2] * A[2]
movq 16(%rsi), %rax
mulq %rax
addq %r8, %r12
adcq %rax, %r13
adcq $0x00, %rdx
movq %rdx, %r8
# A[3] * A[3]
movq 24(%rsi), %rax
mulq %rax
addq %rax, %r15
adcq %rdx, %rbx
addq %r8, %r14
adcq $0x00, %r15
adcq $0x00, %rbx
movq $38, %rax
mulq %rbx
addq %rax, %r12
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r12, %rdx
imulq $19, %rdx, %rdx
andq %r8, %r12
movq %rdx, %r8
movq $38, %rax
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
adcq %rdx, %r15
addq %r8, %r9
adcq %r13, %r10
adcq %r14, %r11
adcq %r15, %r12
# Store
movq %r9, (%rdi)
movq %r10, 8(%rdi)
movq %r11, 16(%rdi)
movq %r12, 24(%rdi)
addq $32, %rsi
# Square
# A[0] * A[1]
movq (%rsi), %rax
mulq 8(%rsi)
movq %rax, %r10
movq %rdx, %r11
# A[0] * A[2]
movq (%rsi), %rax
mulq 16(%rsi)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[0] * A[3]
movq (%rsi), %rax
mulq 24(%rsi)
xorq %r13, %r13
addq %rax, %r12
adcq %rdx, %r13
# A[1] * A[2]
movq 8(%rsi), %rax
mulq 16(%rsi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * A[3]
movq 8(%rsi), %rax
mulq 24(%rsi)
addq %rax, %r13
adcq %rdx, %r14
# A[2] * A[3]
movq 16(%rsi), %rax
mulq 24(%rsi)
xorq %r15, %r15
addq %rax, %r14
adcq %rdx, %r15
# Double
xorq %rbx, %rbx
addq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq %r14, %r14
adcq %r15, %r15
adcq $0x00, %rbx
# A[0] * A[0]
movq (%rsi), %rax
mulq %rax
movq %rax, %r9
movq %rdx, %r8
# A[1] * A[1]
movq 8(%rsi), %rax
mulq %rax
addq %r8, %r10
adcq %rax, %r11
adcq $0x00, %rdx
movq %rdx, %r8
# A[2] * A[2]
movq 16(%rsi), %rax
mulq %rax
addq %r8, %r12
adcq %rax, %r13
adcq $0x00, %rdx
movq %rdx, %r8
# A[3] * A[3]
movq 24(%rsi), %rax
mulq %rax
addq %rax, %r15
adcq %rdx, %rbx
addq %r8, %r14
adcq $0x00, %r15
adcq $0x00, %rbx
movq $38, %rax
mulq %rbx
addq %rax, %r12
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r12, %rdx
imulq $19, %rdx, %rdx
andq %r8, %r12
movq %rdx, %r8
movq $38, %rax
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
adcq %rdx, %r15
addq %r8, %r9
adcq %r13, %r10
adcq %r14, %r11
adcq %r15, %r12
# Store
movq %rdi, %rsi
subq $32, %rdi
# Add-Sub
# Add
movq %r9, %r13
addq (%rsi), %r9
movq %r10, %r14
adcq 8(%rsi), %r10
movq %r11, %r15
adcq 16(%rsi), %r11
movq %r12, %rbx
adcq 24(%rsi), %r12
movq $0x00, %r8
adcq $0x00, %r8
shldq $0x01, %r12, %r8
imulq $19, %r8
btr $63, %r12
# Sub modulus (if overflow)
addq %r8, %r9
adcq $0x00, %r10
adcq $0x00, %r11
adcq $0x00, %r12
# Sub
subq (%rsi), %r13
sbbq 8(%rsi), %r14
sbbq 16(%rsi), %r15
sbbq 24(%rsi), %rbx
sbbq %r8, %r8
shldq $0x01, %rbx, %r8
imulq $-19, %r8
btr $63, %rbx
# Add modulus (if underflow)
subq %r8, %r13
sbbq $0x00, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
movq %r9, (%rdi)
movq %r10, 8(%rdi)
movq %r11, 16(%rdi)
movq %r12, 24(%rdi)
movq %r13, (%rsi)
movq %r14, 8(%rsi)
movq %r15, 16(%rsi)
movq %rbx, 24(%rsi)
movq 8(%rsp), %rcx
movq %rcx, %rsi
addq $32, %rsi
subq $32, %rdi
# Add
movq (%rsi), %r9
movq 8(%rsi), %r10
addq (%rcx), %r9
movq 16(%rsi), %r11
adcq 8(%rcx), %r10
movq 24(%rsi), %r12
adcq 16(%rcx), %r11
adcq 24(%rcx), %r12
movq $0x00, %r8
adcq $0x00, %r8
shldq $0x01, %r12, %r8
imulq $19, %r8
btr $63, %r12
# Sub modulus (if overflow)
addq %r8, %r9
adcq $0x00, %r10
adcq $0x00, %r11
adcq $0x00, %r12
movq %r9, (%rdi)
movq %r10, 8(%rdi)
movq %r11, 16(%rdi)
movq %r12, 24(%rdi)
# Square
# A[0] * A[1]
movq (%rdi), %rax
mulq 8(%rdi)
movq %rax, %r10
movq %rdx, %r11
# A[0] * A[2]
movq (%rdi), %rax
mulq 16(%rdi)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[0] * A[3]
movq (%rdi), %rax
mulq 24(%rdi)
xorq %r13, %r13
addq %rax, %r12
adcq %rdx, %r13
# A[1] * A[2]
movq 8(%rdi), %rax
mulq 16(%rdi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * A[3]
movq 8(%rdi), %rax
mulq 24(%rdi)
addq %rax, %r13
adcq %rdx, %r14
# A[2] * A[3]
movq 16(%rdi), %rax
mulq 24(%rdi)
xorq %r15, %r15
addq %rax, %r14
adcq %rdx, %r15
# Double
xorq %rbx, %rbx
addq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq %r14, %r14
adcq %r15, %r15
adcq $0x00, %rbx
# A[0] * A[0]
movq (%rdi), %rax
mulq %rax
movq %rax, %r9
movq %rdx, %r8
# A[1] * A[1]
movq 8(%rdi), %rax
mulq %rax
addq %r8, %r10
adcq %rax, %r11
adcq $0x00, %rdx
movq %rdx, %r8
# A[2] * A[2]
movq 16(%rdi), %rax
mulq %rax
addq %r8, %r12
adcq %rax, %r13
adcq $0x00, %rdx
movq %rdx, %r8
# A[3] * A[3]
movq 24(%rdi), %rax
mulq %rax
addq %rax, %r15
adcq %rdx, %rbx
addq %r8, %r14
adcq $0x00, %r15
adcq $0x00, %rbx
movq $38, %rax
mulq %rbx
addq %rax, %r12
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r12, %rdx
imulq $19, %rdx, %rdx
andq %r8, %r12
movq %rdx, %r8
movq $38, %rax
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
adcq %rdx, %r15
addq %r8, %r9
adcq %r13, %r10
adcq %r14, %r11
adcq %r15, %r12
# Store
movq %rdi, %rsi
addq $32, %rsi
# Sub
subq (%rsi), %r9
sbbq 8(%rsi), %r10
sbbq 16(%rsi), %r11
sbbq 24(%rsi), %r12
sbbq %r8, %r8
shldq $0x01, %r12, %r8
imulq $-19, %r8
btr $63, %r12
# Add modulus (if underflow)
subq %r8, %r9
sbbq $0x00, %r10
sbbq $0x00, %r11
sbbq $0x00, %r12
movq %r9, (%rdi)
movq %r10, 8(%rdi)
movq %r11, 16(%rdi)
movq %r12, 24(%rdi)
addq $0x40, %rcx
# Square * 2
# A[0] * A[1]
movq (%rcx), %rax
mulq 8(%rcx)
movq %rax, %r10
movq %rdx, %r11
# A[0] * A[2]
movq (%rcx), %rax
mulq 16(%rcx)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[0] * A[3]
movq (%rcx), %rax
mulq 24(%rcx)
xorq %r13, %r13
addq %rax, %r12
adcq %rdx, %r13
# A[1] * A[2]
movq 8(%rcx), %rax
mulq 16(%rcx)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[1] * A[3]
movq 8(%rcx), %rax
mulq 24(%rcx)
addq %rax, %r13
adcq %rdx, %r14
# A[2] * A[3]
movq 16(%rcx), %rax
mulq 24(%rcx)
xorq %r15, %r15
addq %rax, %r14
adcq %rdx, %r15
# Double
xorq %rbx, %rbx
addq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq %r14, %r14
adcq %r15, %r15
adcq $0x00, %rbx
# A[0] * A[0]
movq (%rcx), %rax
mulq %rax
movq %rax, %r9
movq %rdx, %r8
# A[1] * A[1]
movq 8(%rcx), %rax
mulq %rax
addq %r8, %r10
adcq %rax, %r11
adcq $0x00, %rdx
movq %rdx, %r8
# A[2] * A[2]
movq 16(%rcx), %rax
mulq %rax
addq %r8, %r12
adcq %rax, %r13
adcq $0x00, %rdx
movq %rdx, %r8
# A[3] * A[3]
movq 24(%rcx), %rax
mulq %rax
addq %rax, %r15
adcq %rdx, %rbx
addq %r8, %r14
adcq $0x00, %r15
adcq $0x00, %rbx
movq $38, %rax
mulq %rbx
addq %rax, %r12
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r12, %rdx
imulq $19, %rdx, %rdx
andq %r8, %r12
movq %rdx, %r8
movq $38, %rax
mulq %r13
xorq %r13, %r13
addq %rax, %r9
movq $38, %rax
adcq %rdx, %r13
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
adcq %rdx, %r15
addq %r8, %r9
adcq %r13, %r10
adcq %r14, %r11
adcq %r15, %r12
movq %r12, %rax
shldq $0x01, %r11, %r12
shldq $0x01, %r10, %r11
shldq $0x01, %r9, %r10
shlq $0x01, %r9
movq $0x7fffffffffffffff, %r8
shrq $62, %rax
andq %r8, %r12
imulq $19, %rax, %rax
addq %rax, %r9
adcq $0x00, %r10
adcq $0x00, %r11
adcq $0x00, %r12
# Store
movq %rdi, %rsi
addq $0x40, %rsi
addq $0x60, %rdi
# Sub
subq (%rsi), %r9
sbbq 8(%rsi), %r10
sbbq 16(%rsi), %r11
sbbq 24(%rsi), %r12
sbbq %r8, %r8
shldq $0x01, %r12, %r8
imulq $-19, %r8
btr $63, %r12
# Add modulus (if underflow)
subq %r8, %r9
sbbq $0x00, %r10
sbbq $0x00, %r11
sbbq $0x00, %r12
movq %r9, (%rdi)
movq %r10, 8(%rdi)
movq %r11, 16(%rdi)
movq %r12, 24(%rdi)
addq $16, %rsp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size ge_p2_dbl_x64,.-ge_p2_dbl_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_madd_x64
.type ge_madd_x64,@function
.align 16
ge_madd_x64:
#else
.section __TEXT,__text
.globl _ge_madd_x64
.p2align 4
_ge_madd_x64:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
pushq %rbp
movq %rdx, %rcx
subq $24, %rsp
movq %rdi, (%rsp)
movq %rsi, 8(%rsp)
movq %rcx, 16(%rsp)
movq %rsi, %r8
movq %rsi, %rcx
addq $32, %rcx
movq %rdi, %rsi
addq $32, %rsi
# Add-Sub
# Add
movq (%rcx), %r10
movq 8(%rcx), %r11
movq 16(%rcx), %r12
movq 24(%rcx), %r13
movq %r10, %r14
addq (%r8), %r10
movq %r11, %r15
adcq 8(%r8), %r11
movq %r12, %rbx
adcq 16(%r8), %r12
movq %r13, %rbp
adcq 24(%r8), %r13
movq $0x00, %r9
adcq $0x00, %r9
shldq $0x01, %r13, %r9
imulq $19, %r9
btr $63, %r13
# Sub modulus (if overflow)
addq %r9, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%r8), %r14
sbbq 8(%r8), %r15
sbbq 16(%r8), %rbx
sbbq 24(%r8), %rbp
sbbq %r9, %r9
shldq $0x01, %rbp, %r9
imulq $-19, %r9
btr $63, %rbp
# Add modulus (if underflow)
subq %r9, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %r14, (%rsi)
movq %r15, 8(%rsi)
movq %rbx, 16(%rsi)
movq %rbp, 24(%rsi)
movq 16(%rsp), %rcx
addq $32, %rcx
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%rsi)
movq %rax, %r10
movq %rdx, %r11
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%rsi)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%rsi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%rsi)
addq %rax, %r12
adcq %rdx, %r13
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%rsi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%rsi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%rsi)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%rsi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%rsi)
xorq %rbp, %rbp
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%rsi)
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%rsi)
addq %rax, %rbx
adcq %rdx, %rbp
movq $38, %rax
mulq %rbp
addq %rax, %r13
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %rdx
imulq $19, %rdx, %rdx
andq %r9, %r13
movq %rdx, %r9
movq $38, %rax
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
movq $38, %rax
adcq %rdx, %r15
mulq %rbx
xorq %rbx, %rbx
addq %rax, %r12
adcq %rdx, %rbx
addq %r9, %r10
adcq %r14, %r11
adcq %r15, %r12
adcq %rbx, %r13
# Store
movq %r10, (%rsi)
movq %r11, 8(%rsi)
movq %r12, 16(%rsi)
movq %r13, 24(%rsi)
addq $0x60, %r8
addq $32, %rcx
addq $0x60, %rdi
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%r8)
movq %rax, %r10
movq %rdx, %r11
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%r8)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%r8)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%r8)
addq %rax, %r12
adcq %rdx, %r13
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%r8)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%r8)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%r8)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%r8)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%r8)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%r8)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%r8)
xorq %rbp, %rbp
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%r8)
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%r8)
addq %rax, %rbx
adcq %rdx, %rbp
movq $38, %rax
mulq %rbp
addq %rax, %r13
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %rdx
imulq $19, %rdx, %rdx
andq %r9, %r13
movq %rdx, %r9
movq $38, %rax
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
movq $38, %rax
adcq %rdx, %r15
mulq %rbx
xorq %rbx, %rbx
addq %rax, %r12
adcq %rdx, %rbx
addq %r9, %r10
adcq %r14, %r11
adcq %r15, %r12
adcq %rbx, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
subq $0x40, %rcx
subq $0x60, %rdi
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%rdi)
movq %rax, %r10
movq %rdx, %r11
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%rdi)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%rdi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%rdi)
addq %rax, %r12
adcq %rdx, %r13
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%rdi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%rdi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%rdi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%rdi)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%rdi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%rdi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%rdi)
xorq %rbp, %rbp
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%rdi)
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%rdi)
addq %rax, %rbx
adcq %rdx, %rbp
movq $38, %rax
mulq %rbp
addq %rax, %r13
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %rdx
imulq $19, %rdx, %rdx
andq %r9, %r13
movq %rdx, %r9
movq $38, %rax
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
movq $38, %rax
adcq %rdx, %r15
mulq %rbx
xorq %rbx, %rbx
addq %rax, %r12
adcq %rdx, %rbx
addq %r9, %r10
adcq %r14, %r11
adcq %r15, %r12
adcq %rbx, %r13
# Store
# Add-Sub
# Add
movq %r10, %r14
addq (%rsi), %r10
movq %r11, %r15
adcq 8(%rsi), %r11
movq %r12, %rbx
adcq 16(%rsi), %r12
movq %r13, %rbp
adcq 24(%rsi), %r13
movq $0x00, %r9
adcq $0x00, %r9
shldq $0x01, %r13, %r9
imulq $19, %r9
btr $63, %r13
# Sub modulus (if overflow)
addq %r9, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rsi), %r14
sbbq 8(%rsi), %r15
sbbq 16(%rsi), %rbx
sbbq 24(%rsi), %rbp
sbbq %r9, %r9
shldq $0x01, %rbp, %r9
imulq $-19, %r9
btr $63, %rbp
# Add modulus (if underflow)
subq %r9, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rsi)
movq %r11, 8(%rsi)
movq %r12, 16(%rsi)
movq %r13, 24(%rsi)
movq %r14, (%rdi)
movq %r15, 8(%rdi)
movq %rbx, 16(%rdi)
movq %rbp, 24(%rdi)
subq $32, %r8
# Double
movq (%r8), %r10
movq 8(%r8), %r11
addq %r10, %r10
movq 16(%r8), %r12
adcq %r11, %r11
movq 24(%r8), %r13
adcq %r12, %r12
adcq %r13, %r13
movq $0x00, %r9
adcq $0x00, %r9
shldq $0x01, %r13, %r9
imulq $19, %r9
btr $63, %r13
# Sub modulus (if overflow)
addq %r9, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
movq %rdi, %rsi
addq $0x60, %rsi
addq $0x40, %rdi
# Add-Sub
# Add
movq %r10, %r14
addq (%rsi), %r10
movq %r11, %r15
adcq 8(%rsi), %r11
movq %r12, %rbx
adcq 16(%rsi), %r12
movq %r13, %rbp
adcq 24(%rsi), %r13
movq $0x00, %r9
adcq $0x00, %r9
shldq $0x01, %r13, %r9
imulq $19, %r9
btr $63, %r13
# Sub modulus (if overflow)
addq %r9, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rsi), %r14
sbbq 8(%rsi), %r15
sbbq 16(%rsi), %rbx
sbbq 24(%rsi), %rbp
sbbq %r9, %r9
shldq $0x01, %rbp, %r9
imulq $-19, %r9
btr $63, %rbp
# Add modulus (if underflow)
subq %r9, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %r14, (%rsi)
movq %r15, 8(%rsi)
movq %rbx, 16(%rsi)
movq %rbp, 24(%rsi)
addq $24, %rsp
popq %rbp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size ge_madd_x64,.-ge_madd_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_msub_x64
.type ge_msub_x64,@function
.align 16
ge_msub_x64:
#else
.section __TEXT,__text
.globl _ge_msub_x64
.p2align 4
_ge_msub_x64:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
pushq %rbp
movq %rdx, %rcx
subq $24, %rsp
movq %rdi, (%rsp)
movq %rsi, 8(%rsp)
movq %rcx, 16(%rsp)
movq %rsi, %r8
movq %rsi, %rcx
addq $32, %rcx
movq %rdi, %rsi
addq $32, %rsi
# Add-Sub
# Add
movq (%rcx), %r10
movq 8(%rcx), %r11
movq 16(%rcx), %r12
movq 24(%rcx), %r13
movq %r10, %r14
addq (%r8), %r10
movq %r11, %r15
adcq 8(%r8), %r11
movq %r12, %rbx
adcq 16(%r8), %r12
movq %r13, %rbp
adcq 24(%r8), %r13
movq $0x00, %r9
adcq $0x00, %r9
shldq $0x01, %r13, %r9
imulq $19, %r9
btr $63, %r13
# Sub modulus (if overflow)
addq %r9, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%r8), %r14
sbbq 8(%r8), %r15
sbbq 16(%r8), %rbx
sbbq 24(%r8), %rbp
sbbq %r9, %r9
shldq $0x01, %rbp, %r9
imulq $-19, %r9
btr $63, %rbp
# Add modulus (if underflow)
subq %r9, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %r14, (%rsi)
movq %r15, 8(%rsi)
movq %rbx, 16(%rsi)
movq %rbp, 24(%rsi)
movq 16(%rsp), %rcx
addq $32, %rdi
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%rdi)
movq %rax, %r10
movq %rdx, %r11
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%rdi)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%rdi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%rdi)
addq %rax, %r12
adcq %rdx, %r13
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%rdi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%rdi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%rdi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%rdi)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%rdi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%rdi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%rdi)
xorq %rbp, %rbp
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%rdi)
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%rdi)
addq %rax, %rbx
adcq %rdx, %rbp
movq $38, %rax
mulq %rbp
addq %rax, %r13
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %rdx
imulq $19, %rdx, %rdx
andq %r9, %r13
movq %rdx, %r9
movq $38, %rax
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
movq $38, %rax
adcq %rdx, %r15
mulq %rbx
xorq %rbx, %rbx
addq %rax, %r12
adcq %rdx, %rbx
addq %r9, %r10
adcq %r14, %r11
adcq %r15, %r12
adcq %rbx, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
addq $0x60, %r8
addq $0x40, %rcx
addq $0x40, %rdi
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%r8)
movq %rax, %r10
movq %rdx, %r11
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%r8)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%r8)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%r8)
addq %rax, %r12
adcq %rdx, %r13
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%r8)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%r8)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%r8)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%r8)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%r8)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%r8)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%r8)
xorq %rbp, %rbp
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%r8)
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%r8)
addq %rax, %rbx
adcq %rdx, %rbp
movq $38, %rax
mulq %rbp
addq %rax, %r13
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %rdx
imulq $19, %rdx, %rdx
andq %r9, %r13
movq %rdx, %r9
movq $38, %rax
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
movq $38, %rax
adcq %rdx, %r15
mulq %rbx
xorq %rbx, %rbx
addq %rax, %r12
adcq %rdx, %rbx
addq %r9, %r10
adcq %r14, %r11
adcq %r15, %r12
adcq %rbx, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
subq $32, %rcx
subq $0x60, %rdi
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%rdi)
movq %rax, %r10
movq %rdx, %r11
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%rdi)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%rdi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%rdi)
addq %rax, %r12
adcq %rdx, %r13
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%rdi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%rdi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%rdi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%rdi)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%rdi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%rdi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%rdi)
xorq %rbp, %rbp
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%rdi)
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%rdi)
addq %rax, %rbx
adcq %rdx, %rbp
movq $38, %rax
mulq %rbp
addq %rax, %r13
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %rdx
imulq $19, %rdx, %rdx
andq %r9, %r13
movq %rdx, %r9
movq $38, %rax
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
movq $38, %rax
adcq %rdx, %r15
mulq %rbx
xorq %rbx, %rbx
addq %rax, %r12
adcq %rdx, %rbx
addq %r9, %r10
adcq %r14, %r11
adcq %r15, %r12
adcq %rbx, %r13
# Store
# Add-Sub
# Add
movq %r10, %r14
addq (%rsi), %r10
movq %r11, %r15
adcq 8(%rsi), %r11
movq %r12, %rbx
adcq 16(%rsi), %r12
movq %r13, %rbp
adcq 24(%rsi), %r13
movq $0x00, %r9
adcq $0x00, %r9
shldq $0x01, %r13, %r9
imulq $19, %r9
btr $63, %r13
# Sub modulus (if overflow)
addq %r9, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rsi), %r14
sbbq 8(%rsi), %r15
sbbq 16(%rsi), %rbx
sbbq 24(%rsi), %rbp
sbbq %r9, %r9
shldq $0x01, %rbp, %r9
imulq $-19, %r9
btr $63, %rbp
# Add modulus (if underflow)
subq %r9, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rsi)
movq %r11, 8(%rsi)
movq %r12, 16(%rsi)
movq %r13, 24(%rsi)
movq %r14, (%rdi)
movq %r15, 8(%rdi)
movq %rbx, 16(%rdi)
movq %rbp, 24(%rdi)
subq $32, %r8
addq $0x40, %rdi
# Double
movq (%r8), %r10
movq 8(%r8), %r11
addq %r10, %r10
movq 16(%r8), %r12
adcq %r11, %r11
movq 24(%r8), %r13
adcq %r12, %r12
adcq %r13, %r13
movq $0x00, %r9
adcq $0x00, %r9
shldq $0x01, %r13, %r9
imulq $19, %r9
btr $63, %r13
# Sub modulus (if overflow)
addq %r9, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
movq %rdi, %rsi
addq $32, %rsi
# Add-Sub
# Add
movq %r10, %r14
addq (%rsi), %r10
movq %r11, %r15
adcq 8(%rsi), %r11
movq %r12, %rbx
adcq 16(%rsi), %r12
movq %r13, %rbp
adcq 24(%rsi), %r13
movq $0x00, %r9
adcq $0x00, %r9
shldq $0x01, %r13, %r9
imulq $19, %r9
btr $63, %r13
# Sub modulus (if overflow)
addq %r9, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rsi), %r14
sbbq 8(%rsi), %r15
sbbq 16(%rsi), %rbx
sbbq 24(%rsi), %rbp
sbbq %r9, %r9
shldq $0x01, %rbp, %r9
imulq $-19, %r9
btr $63, %rbp
# Add modulus (if underflow)
subq %r9, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rsi)
movq %r11, 8(%rsi)
movq %r12, 16(%rsi)
movq %r13, 24(%rsi)
movq %r14, (%rdi)
movq %r15, 8(%rdi)
movq %rbx, 16(%rdi)
movq %rbp, 24(%rdi)
addq $24, %rsp
popq %rbp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size ge_msub_x64,.-ge_msub_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_add_x64
.type ge_add_x64,@function
.align 16
ge_add_x64:
#else
.section __TEXT,__text
.globl _ge_add_x64
.p2align 4
_ge_add_x64:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
pushq %rbp
movq %rdx, %rcx
subq $24, %rsp
movq %rdi, (%rsp)
movq %rsi, 8(%rsp)
movq %rcx, 16(%rsp)
movq %rsi, %r8
movq %rsi, %rcx
addq $32, %rcx
movq %rdi, %rsi
addq $32, %rsi
# Add-Sub
# Add
movq (%rcx), %r10
movq 8(%rcx), %r11
movq 16(%rcx), %r12
movq 24(%rcx), %r13
movq %r10, %r14
addq (%r8), %r10
movq %r11, %r15
adcq 8(%r8), %r11
movq %r12, %rbx
adcq 16(%r8), %r12
movq %r13, %rbp
adcq 24(%r8), %r13
movq $0x00, %r9
adcq $0x00, %r9
shldq $0x01, %r13, %r9
imulq $19, %r9
btr $63, %r13
# Sub modulus (if overflow)
addq %r9, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%r8), %r14
sbbq 8(%r8), %r15
sbbq 16(%r8), %rbx
sbbq 24(%r8), %rbp
sbbq %r9, %r9
shldq $0x01, %rbp, %r9
imulq $-19, %r9
btr $63, %rbp
# Add modulus (if underflow)
subq %r9, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %r14, (%rsi)
movq %r15, 8(%rsi)
movq %rbx, 16(%rsi)
movq %rbp, 24(%rsi)
movq 16(%rsp), %rcx
addq $32, %rcx
addq $32, %rdi
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%rdi)
movq %rax, %r10
movq %rdx, %r11
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%rdi)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%rdi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%rdi)
addq %rax, %r12
adcq %rdx, %r13
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%rdi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%rdi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%rdi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%rdi)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%rdi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%rdi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%rdi)
xorq %rbp, %rbp
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%rdi)
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%rdi)
addq %rax, %rbx
adcq %rdx, %rbp
movq $38, %rax
mulq %rbp
addq %rax, %r13
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %rdx
imulq $19, %rdx, %rdx
andq %r9, %r13
movq %rdx, %r9
movq $38, %rax
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
movq $38, %rax
adcq %rdx, %r15
mulq %rbx
xorq %rbx, %rbx
addq %rax, %r12
adcq %rdx, %rbx
addq %r9, %r10
adcq %r14, %r11
adcq %r15, %r12
adcq %rbx, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
addq $0x60, %r8
addq $0x40, %rcx
addq $0x40, %rdi
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%r8)
movq %rax, %r10
movq %rdx, %r11
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%r8)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%r8)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%r8)
addq %rax, %r12
adcq %rdx, %r13
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%r8)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%r8)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%r8)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%r8)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%r8)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%r8)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%r8)
xorq %rbp, %rbp
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%r8)
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%r8)
addq %rax, %rbx
adcq %rdx, %rbp
movq $38, %rax
mulq %rbp
addq %rax, %r13
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %rdx
imulq $19, %rdx, %rdx
andq %r9, %r13
movq %rdx, %r9
movq $38, %rax
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
movq $38, %rax
adcq %rdx, %r15
mulq %rbx
xorq %rbx, %rbx
addq %rax, %r12
adcq %rdx, %rbx
addq %r9, %r10
adcq %r14, %r11
adcq %r15, %r12
adcq %rbx, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
subq $0x60, %rcx
subq $0x60, %rdi
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%rdi)
movq %rax, %r10
movq %rdx, %r11
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%rdi)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%rdi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%rdi)
addq %rax, %r12
adcq %rdx, %r13
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%rdi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%rdi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%rdi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%rdi)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%rdi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%rdi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%rdi)
xorq %rbp, %rbp
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%rdi)
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%rdi)
addq %rax, %rbx
adcq %rdx, %rbp
movq $38, %rax
mulq %rbp
addq %rax, %r13
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %rdx
imulq $19, %rdx, %rdx
andq %r9, %r13
movq %rdx, %r9
movq $38, %rax
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
movq $38, %rax
adcq %rdx, %r15
mulq %rbx
xorq %rbx, %rbx
addq %rax, %r12
adcq %rdx, %rbx
addq %r9, %r10
adcq %r14, %r11
adcq %r15, %r12
adcq %rbx, %r13
# Store
# Add-Sub
# Add
movq %r10, %r14
addq (%rsi), %r10
movq %r11, %r15
adcq 8(%rsi), %r11
movq %r12, %rbx
adcq 16(%rsi), %r12
movq %r13, %rbp
adcq 24(%rsi), %r13
movq $0x00, %r9
adcq $0x00, %r9
shldq $0x01, %r13, %r9
imulq $19, %r9
btr $63, %r13
# Sub modulus (if overflow)
addq %r9, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rsi), %r14
sbbq 8(%rsi), %r15
sbbq 16(%rsi), %rbx
sbbq 24(%rsi), %rbp
sbbq %r9, %r9
shldq $0x01, %rbp, %r9
imulq $-19, %r9
btr $63, %rbp
# Add modulus (if underflow)
subq %r9, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rsi)
movq %r11, 8(%rsi)
movq %r12, 16(%rsi)
movq %r13, 24(%rsi)
movq %r14, (%rdi)
movq %r15, 8(%rdi)
movq %rbx, 16(%rdi)
movq %rbp, 24(%rdi)
subq $32, %r8
addq $0x40, %rcx
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%r8)
movq %rax, %r10
movq %rdx, %r11
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%r8)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%r8)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%r8)
addq %rax, %r12
adcq %rdx, %r13
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%r8)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%r8)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%r8)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%r8)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%r8)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%r8)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%r8)
xorq %rbp, %rbp
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%r8)
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%r8)
addq %rax, %rbx
adcq %rdx, %rbp
movq $38, %rax
mulq %rbp
addq %rax, %r13
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %rdx
imulq $19, %rdx, %rdx
andq %r9, %r13
movq %rdx, %r9
movq $38, %rax
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
movq $38, %rax
adcq %rdx, %r15
mulq %rbx
xorq %rbx, %rbx
addq %rax, %r12
adcq %rdx, %rbx
addq %r9, %r10
adcq %r14, %r11
adcq %r15, %r12
adcq %rbx, %r13
# Store
addq $0x40, %rdi
# Double
addq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
movq $0x00, %r9
adcq $0x00, %r9
shldq $0x01, %r13, %r9
imulq $19, %r9
btr $63, %r13
# Sub modulus (if overflow)
addq %r9, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
movq %rdi, %rsi
addq $32, %rsi
# Add-Sub
# Add
movq %r10, %r14
addq (%rsi), %r10
movq %r11, %r15
adcq 8(%rsi), %r11
movq %r12, %rbx
adcq 16(%rsi), %r12
movq %r13, %rbp
adcq 24(%rsi), %r13
movq $0x00, %r9
adcq $0x00, %r9
shldq $0x01, %r13, %r9
imulq $19, %r9
btr $63, %r13
# Sub modulus (if overflow)
addq %r9, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rsi), %r14
sbbq 8(%rsi), %r15
sbbq 16(%rsi), %rbx
sbbq 24(%rsi), %rbp
sbbq %r9, %r9
shldq $0x01, %rbp, %r9
imulq $-19, %r9
btr $63, %rbp
# Add modulus (if underflow)
subq %r9, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %r14, (%rsi)
movq %r15, 8(%rsi)
movq %rbx, 16(%rsi)
movq %rbp, 24(%rsi)
addq $24, %rsp
popq %rbp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size ge_add_x64,.-ge_add_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_sub_x64
.type ge_sub_x64,@function
.align 16
ge_sub_x64:
#else
.section __TEXT,__text
.globl _ge_sub_x64
.p2align 4
_ge_sub_x64:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
pushq %rbp
movq %rdx, %rcx
subq $24, %rsp
movq %rdi, (%rsp)
movq %rsi, 8(%rsp)
movq %rcx, 16(%rsp)
movq %rsi, %r8
movq %rsi, %rcx
addq $32, %rcx
movq %rdi, %rsi
addq $32, %rsi
# Add-Sub
# Add
movq (%rcx), %r10
movq 8(%rcx), %r11
movq 16(%rcx), %r12
movq 24(%rcx), %r13
movq %r10, %r14
addq (%r8), %r10
movq %r11, %r15
adcq 8(%r8), %r11
movq %r12, %rbx
adcq 16(%r8), %r12
movq %r13, %rbp
adcq 24(%r8), %r13
movq $0x00, %r9
adcq $0x00, %r9
shldq $0x01, %r13, %r9
imulq $19, %r9
btr $63, %r13
# Sub modulus (if overflow)
addq %r9, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%r8), %r14
sbbq 8(%r8), %r15
sbbq 16(%r8), %rbx
sbbq 24(%r8), %rbp
sbbq %r9, %r9
shldq $0x01, %rbp, %r9
imulq $-19, %r9
btr $63, %rbp
# Add modulus (if underflow)
subq %r9, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %r14, (%rsi)
movq %r15, 8(%rsi)
movq %rbx, 16(%rsi)
movq %rbp, 24(%rsi)
movq 16(%rsp), %rcx
addq $32, %rdi
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%rdi)
movq %rax, %r10
movq %rdx, %r11
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%rdi)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%rdi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%rdi)
addq %rax, %r12
adcq %rdx, %r13
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%rdi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%rdi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%rdi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%rdi)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%rdi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%rdi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%rdi)
xorq %rbp, %rbp
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%rdi)
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%rdi)
addq %rax, %rbx
adcq %rdx, %rbp
movq $38, %rax
mulq %rbp
addq %rax, %r13
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %rdx
imulq $19, %rdx, %rdx
andq %r9, %r13
movq %rdx, %r9
movq $38, %rax
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
movq $38, %rax
adcq %rdx, %r15
mulq %rbx
xorq %rbx, %rbx
addq %rax, %r12
adcq %rdx, %rbx
addq %r9, %r10
adcq %r14, %r11
adcq %r15, %r12
adcq %rbx, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
addq $0x60, %r8
addq $0x60, %rcx
addq $0x40, %rdi
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%r8)
movq %rax, %r10
movq %rdx, %r11
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%r8)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%r8)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%r8)
addq %rax, %r12
adcq %rdx, %r13
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%r8)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%r8)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%r8)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%r8)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%r8)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%r8)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%r8)
xorq %rbp, %rbp
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%r8)
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%r8)
addq %rax, %rbx
adcq %rdx, %rbp
movq $38, %rax
mulq %rbp
addq %rax, %r13
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %rdx
imulq $19, %rdx, %rdx
andq %r9, %r13
movq %rdx, %r9
movq $38, %rax
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
movq $38, %rax
adcq %rdx, %r15
mulq %rbx
xorq %rbx, %rbx
addq %rax, %r12
adcq %rdx, %rbx
addq %r9, %r10
adcq %r14, %r11
adcq %r15, %r12
adcq %rbx, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
subq $0x40, %rcx
subq $0x60, %rdi
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%rdi)
movq %rax, %r10
movq %rdx, %r11
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%rdi)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%rdi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%rdi)
addq %rax, %r12
adcq %rdx, %r13
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%rdi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%rdi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%rdi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%rdi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%rdi)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%rdi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%rdi)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%rdi)
xorq %rbp, %rbp
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%rdi)
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%rdi)
addq %rax, %rbx
adcq %rdx, %rbp
movq $38, %rax
mulq %rbp
addq %rax, %r13
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %rdx
imulq $19, %rdx, %rdx
andq %r9, %r13
movq %rdx, %r9
movq $38, %rax
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
movq $38, %rax
adcq %rdx, %r15
mulq %rbx
xorq %rbx, %rbx
addq %rax, %r12
adcq %rdx, %rbx
addq %r9, %r10
adcq %r14, %r11
adcq %r15, %r12
adcq %rbx, %r13
# Store
# Add-Sub
# Add
movq %r10, %r14
addq (%rsi), %r10
movq %r11, %r15
adcq 8(%rsi), %r11
movq %r12, %rbx
adcq 16(%rsi), %r12
movq %r13, %rbp
adcq 24(%rsi), %r13
movq $0x00, %r9
adcq $0x00, %r9
shldq $0x01, %r13, %r9
imulq $19, %r9
btr $63, %r13
# Sub modulus (if overflow)
addq %r9, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rsi), %r14
sbbq 8(%rsi), %r15
sbbq 16(%rsi), %rbx
sbbq 24(%rsi), %rbp
sbbq %r9, %r9
shldq $0x01, %rbp, %r9
imulq $-19, %r9
btr $63, %rbp
# Add modulus (if underflow)
subq %r9, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rsi)
movq %r11, 8(%rsi)
movq %r12, 16(%rsi)
movq %r13, 24(%rsi)
movq %r14, (%rdi)
movq %r15, 8(%rdi)
movq %rbx, 16(%rdi)
movq %rbp, 24(%rdi)
subq $32, %r8
addq $32, %rcx
# Multiply
# A[0] * B[0]
movq (%rcx), %rax
mulq (%r8)
movq %rax, %r10
movq %rdx, %r11
# A[0] * B[1]
movq 8(%rcx), %rax
mulq (%r8)
xorq %r12, %r12
addq %rax, %r11
adcq %rdx, %r12
# A[1] * B[0]
movq (%rcx), %rax
mulq 8(%r8)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[0] * B[2]
movq 16(%rcx), %rax
mulq (%r8)
addq %rax, %r12
adcq %rdx, %r13
# A[1] * B[1]
movq 8(%rcx), %rax
mulq 8(%r8)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[0]
movq (%rcx), %rax
mulq 16(%r8)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[0] * B[3]
movq 24(%rcx), %rax
mulq (%r8)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[2]
movq 16(%rcx), %rax
mulq 8(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[2] * B[1]
movq 8(%rcx), %rax
mulq 16(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[0]
movq (%rcx), %rax
mulq 24(%r8)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[1] * B[3]
movq 24(%rcx), %rax
mulq 8(%r8)
xorq %rbx, %rbx
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[2]
movq 16(%rcx), %rax
mulq 16(%r8)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[3] * B[1]
movq 8(%rcx), %rax
mulq 24(%r8)
addq %rax, %r14
adcq %rdx, %r15
adcq $0x00, %rbx
# A[2] * B[3]
movq 24(%rcx), %rax
mulq 16(%r8)
xorq %rbp, %rbp
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[2]
movq 16(%rcx), %rax
mulq 24(%r8)
addq %rax, %r15
adcq %rdx, %rbx
adcq $0x00, %rbp
# A[3] * B[3]
movq 24(%rcx), %rax
mulq 24(%r8)
addq %rax, %rbx
adcq %rdx, %rbp
movq $38, %rax
mulq %rbp
addq %rax, %r13
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %rdx
imulq $19, %rdx, %rdx
andq %r9, %r13
movq %rdx, %r9
movq $38, %rax
mulq %r14
xorq %r14, %r14
addq %rax, %r10
movq $38, %rax
adcq %rdx, %r14
mulq %r15
xorq %r15, %r15
addq %rax, %r11
movq $38, %rax
adcq %rdx, %r15
mulq %rbx
xorq %rbx, %rbx
addq %rax, %r12
adcq %rdx, %rbx
addq %r9, %r10
adcq %r14, %r11
adcq %r15, %r12
adcq %rbx, %r13
# Store
# Double
addq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
movq $0x00, %r9
adcq $0x00, %r9
shldq $0x01, %r13, %r9
imulq $19, %r9
btr $63, %r13
# Sub modulus (if overflow)
addq %r9, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
movq %rdi, %rsi
addq $0x40, %rsi
addq $0x60, %rdi
# Add-Sub
# Add
movq %r10, %r14
addq (%rdi), %r10
movq %r11, %r15
adcq 8(%rdi), %r11
movq %r12, %rbx
adcq 16(%rdi), %r12
movq %r13, %rbp
adcq 24(%rdi), %r13
movq $0x00, %r9
adcq $0x00, %r9
shldq $0x01, %r13, %r9
imulq $19, %r9
btr $63, %r13
# Sub modulus (if overflow)
addq %r9, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rdi), %r14
sbbq 8(%rdi), %r15
sbbq 16(%rdi), %rbx
sbbq 24(%rdi), %rbp
sbbq %r9, %r9
shldq $0x01, %rbp, %r9
imulq $-19, %r9
btr $63, %rbp
# Add modulus (if underflow)
subq %r9, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %r14, (%rsi)
movq %r15, 8(%rsi)
movq %rbx, 16(%rsi)
movq %rbp, 24(%rsi)
addq $24, %rsp
popq %rbp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size ge_sub_x64,.-ge_sub_x64
#endif /* __APPLE__ */
#ifdef HAVE_ED25519
#ifndef __APPLE__
.text
.globl fe_sq2_x64
.type fe_sq2_x64,@function
.align 16
fe_sq2_x64:
#else
.section __TEXT,__text
.globl _fe_sq2_x64
.p2align 4
_fe_sq2_x64:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
# Square * 2
# A[0] * A[1]
movq (%rsi), %rax
mulq 8(%rsi)
movq %rax, %r8
movq %rdx, %r9
# A[0] * A[2]
movq (%rsi), %rax
mulq 16(%rsi)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[0] * A[3]
movq (%rsi), %rax
mulq 24(%rsi)
xorq %r11, %r11
addq %rax, %r10
adcq %rdx, %r11
# A[1] * A[2]
movq 8(%rsi), %rax
mulq 16(%rsi)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[1] * A[3]
movq 8(%rsi), %rax
mulq 24(%rsi)
addq %rax, %r11
adcq %rdx, %r12
# A[2] * A[3]
movq 16(%rsi), %rax
mulq 24(%rsi)
xorq %r13, %r13
addq %rax, %r12
adcq %rdx, %r13
# Double
xorq %r14, %r14
addq %r8, %r8
adcq %r9, %r9
adcq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
adcq $0x00, %r14
# A[0] * A[0]
movq (%rsi), %rax
mulq %rax
movq %rax, %rcx
movq %rdx, %r15
# A[1] * A[1]
movq 8(%rsi), %rax
mulq %rax
addq %r15, %r8
adcq %rax, %r9
adcq $0x00, %rdx
movq %rdx, %r15
# A[2] * A[2]
movq 16(%rsi), %rax
mulq %rax
addq %r15, %r10
adcq %rax, %r11
adcq $0x00, %rdx
movq %rdx, %r15
# A[3] * A[3]
movq 24(%rsi), %rax
mulq %rax
addq %rax, %r13
adcq %rdx, %r14
addq %r15, %r12
adcq $0x00, %r13
adcq $0x00, %r14
movq $38, %rax
mulq %r14
addq %rax, %r10
adcq $0x00, %rdx
movq $0x7fffffffffffffff, %r15
shldq $0x01, %r10, %rdx
imulq $19, %rdx, %rdx
andq %r15, %r10
movq %rdx, %r15
movq $38, %rax
mulq %r11
xorq %r11, %r11
addq %rax, %rcx
movq $38, %rax
adcq %rdx, %r11
mulq %r12
xorq %r12, %r12
addq %rax, %r8
movq $38, %rax
adcq %rdx, %r12
mulq %r13
xorq %r13, %r13
addq %rax, %r9
adcq %rdx, %r13
addq %r15, %rcx
adcq %r11, %r8
adcq %r12, %r9
adcq %r13, %r10
movq %r10, %rax
shldq $0x01, %r9, %r10
shldq $0x01, %r8, %r9
shldq $0x01, %rcx, %r8
shlq $0x01, %rcx
movq $0x7fffffffffffffff, %r15
shrq $62, %rax
andq %r15, %r10
imulq $19, %rax, %rax
addq %rax, %rcx
adcq $0x00, %r8
adcq $0x00, %r9
adcq $0x00, %r10
# Store
movq %rcx, (%rdi)
movq %r8, 8(%rdi)
movq %r9, 16(%rdi)
movq %r10, 24(%rdi)
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size fe_sq2_x64,.-fe_sq2_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl sc_reduce_x64
.type sc_reduce_x64,@function
.align 16
sc_reduce_x64:
#else
.section __TEXT,__text
.globl _sc_reduce_x64
.p2align 4
_sc_reduce_x64:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
pushq %rbp
movq (%rdi), %r8
movq 8(%rdi), %r9
movq 16(%rdi), %r10
movq 24(%rdi), %r11
movq 32(%rdi), %r12
movq 40(%rdi), %r13
movq 48(%rdi), %r14
movq 56(%rdi), %r15
movq %r15, %rcx
movq $0xfffffffffffffff, %rsi
shrq $56, %rcx
shldq $4, %r14, %r15
shldq $4, %r13, %r14
shldq $4, %r12, %r13
shldq $4, %r11, %r12
andq %rsi, %r11
andq %rsi, %r15
# Add order times bits 504..511
subq %rcx, %r14
sbbq $0x00, %r15
movq $0xeb2106215d086329, %rax
mulq %rcx
movq $0x00, %rsi
addq %rax, %r13
movq $0xa7ed9ce5a30a2c13, %rax
adcq %rdx, %rsi
mulq %rcx
addq %rax, %r12
adcq %rdx, %r13
adcq %rsi, %r14
adcq $0x00, %r15
# Sub product of top 4 words and order
movq $0xa7ed9ce5a30a2c13, %rcx
movq %r12, %rax
mulq %rcx
movq $0x00, %rbp
addq %rax, %r8
adcq %rdx, %rbp
movq %r13, %rax
mulq %rcx
movq $0x00, %rsi
addq %rax, %r9
adcq %rdx, %rsi
movq %r14, %rax
mulq %rcx
addq %rbp, %r9
adcq %rax, %r10
adcq %rdx, %r11
movq $0x00, %rbx
adcq $0x00, %rbx
movq %r15, %rax
mulq %rcx
addq %rsi, %r10
adcq %rax, %r11
adcq %rdx, %rbx
movq $0xeb2106215d086329, %rcx
movq %r12, %rax
mulq %rcx
movq $0x00, %rbp
addq %rax, %r9
adcq %rdx, %rbp
movq %r13, %rax
mulq %rcx
movq $0x00, %rsi
addq %rax, %r10
adcq %rdx, %rsi
movq %r14, %rax
mulq %rcx
addq %rbp, %r10
adcq %rax, %r11
adcq %rdx, %rbx
movq $0x00, %rbp
adcq $0x00, %rbp
movq %r15, %rax
mulq %rcx
addq %rsi, %r11
adcq %rax, %rbx
adcq %rdx, %rbp
subq %r12, %r10
movq %rbx, %r12
sbbq %r13, %r11
movq %rbp, %r13
sbbq %r14, %r12
sbbq %r15, %r13
movq %r13, %rcx
sarq $57, %rcx
# Conditionally subtract order starting at bit 125
movq $0xa000000000000000, %rax
movq $0xcb024c634b9eba7d, %rdx
movq $0x29bdf3bd45ef39a, %rbx
movq $0x200000000000000, %rbp
andq %rcx, %rax
andq %rcx, %rdx
andq %rcx, %rbx
andq %rcx, %rbp
addq %rax, %r9
adcq %rdx, %r10
adcq %rbx, %r11
adcq $0x00, %r12
adcq %rbp, %r13
# Move bits 252-376 to own registers
movq $0xfffffffffffffff, %rcx
shldq $4, %r12, %r13
shldq $4, %r11, %r12
andq %rcx, %r11
# Sub product of top 2 words and order
# * -5812631a5cf5d3ed
movq $0xa7ed9ce5a30a2c13, %rcx
movq %r12, %rax
mulq %rcx
movq $0x00, %rbx
addq %rax, %r8
adcq %rdx, %r9
adcq $0x00, %rbx
movq %r13, %rax
mulq %rcx
addq %rax, %r9
adcq %rdx, %rbx
# * -14def9dea2f79cd7
movq $0xeb2106215d086329, %rcx
movq %r12, %rax
mulq %rcx
movq $0x00, %rbp
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %rbp
movq %r13, %rax
mulq %rcx
addq %rax, %r10
adcq %rdx, %rbp
# Add overflows at 2 * 64
movq $0xfffffffffffffff, %rsi
andq %rsi, %r11
addq %rbx, %r10
adcq %rbp, %r11
# Subtract top at 2 * 64
subq %r12, %r10
sbbq %r13, %r11
sbbq %rsi, %rsi
# Conditional sub order
movq $0x5812631a5cf5d3ed, %rax
movq $0x14def9dea2f79cd6, %rdx
movq $0x1000000000000000, %rbx
andq %rsi, %rax
andq %rsi, %rdx
andq %rsi, %rbx
addq %rax, %r8
movq $0xfffffffffffffff, %rax
adcq %rdx, %r9
adcq $0x00, %r10
adcq %rbx, %r11
andq %rax, %r11
# Store result
movq %r8, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
popq %rbp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size sc_reduce_x64,.-sc_reduce_x64
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl sc_muladd_x64
.type sc_muladd_x64,@function
.align 16
sc_muladd_x64:
#else
.section __TEXT,__text
.globl _sc_muladd_x64
.p2align 4
_sc_muladd_x64:
#endif /* __APPLE__ */
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
movq %rdx, %rbp
# Multiply
# A[0] * B[0]
movq (%rbp), %rax
mulq (%rsi)
movq %rax, %r8
movq %rdx, %r9
# A[0] * B[1]
movq 8(%rbp), %rax
mulq (%rsi)
xorq %r10, %r10
addq %rax, %r9
adcq %rdx, %r10
# A[1] * B[0]
movq (%rbp), %rax
mulq 8(%rsi)
xorq %r11, %r11
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %r11
# A[0] * B[2]
movq 16(%rbp), %rax
mulq (%rsi)
addq %rax, %r10
adcq %rdx, %r11
# A[1] * B[1]
movq 8(%rbp), %rax
mulq 8(%rsi)
xorq %r12, %r12
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[2] * B[0]
movq (%rbp), %rax
mulq 16(%rsi)
addq %rax, %r10
adcq %rdx, %r11
adcq $0x00, %r12
# A[0] * B[3]
movq 24(%rbp), %rax
mulq (%rsi)
xorq %r13, %r13
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[2]
movq 16(%rbp), %rax
mulq 8(%rsi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[2] * B[1]
movq 8(%rbp), %rax
mulq 16(%rsi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[3] * B[0]
movq (%rbp), %rax
mulq 24(%rsi)
addq %rax, %r11
adcq %rdx, %r12
adcq $0x00, %r13
# A[1] * B[3]
movq 24(%rbp), %rax
mulq 8(%rsi)
xorq %r14, %r14
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[2]
movq 16(%rbp), %rax
mulq 16(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[3] * B[1]
movq 8(%rbp), %rax
mulq 24(%rsi)
addq %rax, %r12
adcq %rdx, %r13
adcq $0x00, %r14
# A[2] * B[3]
movq 24(%rbp), %rax
mulq 16(%rsi)
xorq %r15, %r15
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[2]
movq 16(%rbp), %rax
mulq 24(%rsi)
addq %rax, %r13
adcq %rdx, %r14
adcq $0x00, %r15
# A[3] * B[3]
movq 24(%rbp), %rax
mulq 24(%rsi)
addq %rax, %r14
adcq %rdx, %r15
# Add c to a * b
addq (%rcx), %r8
adcq 8(%rcx), %r9
adcq 16(%rcx), %r10
adcq 24(%rcx), %r11
adcq $0x00, %r12
adcq $0x00, %r13
adcq $0x00, %r14
adcq $0x00, %r15
movq %r15, %rbx
movq $0xfffffffffffffff, %rcx
shrq $56, %rbx
shldq $4, %r14, %r15
shldq $4, %r13, %r14
shldq $4, %r12, %r13
shldq $4, %r11, %r12
andq %rcx, %r11
andq %rcx, %r15
# Add order times bits 504..507
subq %rbx, %r14
sbbq $0x00, %r15
movq $0xeb2106215d086329, %rax
mulq %rbx
movq $0x00, %rcx
addq %rax, %r13
movq $0xa7ed9ce5a30a2c13, %rax
adcq %rdx, %rcx
mulq %rbx
addq %rax, %r12
adcq %rdx, %r13
adcq %rcx, %r14
adcq $0x00, %r15
# Sub product of top 4 words and order
movq $0xa7ed9ce5a30a2c13, %rbx
movq %r12, %rax
mulq %rbx
movq $0x00, %rbp
addq %rax, %r8
adcq %rdx, %rbp
movq %r13, %rax
mulq %rbx
movq $0x00, %rcx
addq %rax, %r9
adcq %rdx, %rcx
movq %r14, %rax
mulq %rbx
addq %rbp, %r9
adcq %rax, %r10
adcq %rdx, %r11
movq $0x00, %rsi
adcq $0x00, %rsi
movq %r15, %rax
mulq %rbx
addq %rcx, %r10
adcq %rax, %r11
adcq %rdx, %rsi
movq $0xeb2106215d086329, %rbx
movq %r12, %rax
mulq %rbx
movq $0x00, %rbp
addq %rax, %r9
adcq %rdx, %rbp
movq %r13, %rax
mulq %rbx
movq $0x00, %rcx
addq %rax, %r10
adcq %rdx, %rcx
movq %r14, %rax
mulq %rbx
addq %rbp, %r10
adcq %rax, %r11
adcq %rdx, %rsi
movq $0x00, %rbp
adcq $0x00, %rbp
movq %r15, %rax
mulq %rbx
addq %rcx, %r11
adcq %rax, %rsi
adcq %rdx, %rbp
subq %r12, %r10
movq %rsi, %r12
sbbq %r13, %r11
movq %rbp, %r13
sbbq %r14, %r12
sbbq %r15, %r13
movq %r13, %rbx
sarq $57, %rbx
# Conditionally subtract order starting at bit 125
movq $0xa000000000000000, %rax
movq $0xcb024c634b9eba7d, %rdx
movq $0x29bdf3bd45ef39a, %rsi
movq $0x200000000000000, %rbp
andq %rbx, %rax
andq %rbx, %rdx
andq %rbx, %rsi
andq %rbx, %rbp
addq %rax, %r9
adcq %rdx, %r10
adcq %rsi, %r11
adcq $0x00, %r12
adcq %rbp, %r13
# Move bits 252-376 to own registers
movq $0xfffffffffffffff, %rbx
shldq $4, %r12, %r13
shldq $4, %r11, %r12
andq %rbx, %r11
# Sub product of top 2 words and order
# * -5812631a5cf5d3ed
movq $0xa7ed9ce5a30a2c13, %rbx
movq %r12, %rax
mulq %rbx
movq $0x00, %rsi
addq %rax, %r8
adcq %rdx, %r9
adcq $0x00, %rsi
movq %r13, %rax
mulq %rbx
addq %rax, %r9
adcq %rdx, %rsi
# * -14def9dea2f79cd7
movq $0xeb2106215d086329, %rbx
movq %r12, %rax
mulq %rbx
movq $0x00, %rbp
addq %rax, %r9
adcq %rdx, %r10
adcq $0x00, %rbp
movq %r13, %rax
mulq %rbx
addq %rax, %r10
adcq %rdx, %rbp
# Add overflows at 2 * 64
movq $0xfffffffffffffff, %rcx
andq %rcx, %r11
addq %rsi, %r10
adcq %rbp, %r11
# Subtract top at 2 * 64
subq %r12, %r10
sbbq %r13, %r11
sbbq %rcx, %rcx
# Conditional sub order
movq $0x5812631a5cf5d3ed, %rax
movq $0x14def9dea2f79cd6, %rdx
movq $0x1000000000000000, %rsi
andq %rcx, %rax
andq %rcx, %rdx
andq %rcx, %rsi
addq %rax, %r8
movq $0xfffffffffffffff, %rax
adcq %rdx, %r9
adcq $0x00, %r10
adcq %rsi, %r11
andq %rax, %r11
# Store result
movq %r8, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
repz retq
#ifndef __APPLE__
.size sc_muladd_x64,.-sc_muladd_x64
#endif /* __APPLE__ */
/* Non-constant time modular inversion.
*
* @param [out] r Resulting number.
* @param [in] a Number to invert.
* @return MP_OKAY on success.
*/
#ifndef __APPLE__
.text
.globl fe_invert_nct_x64
.type fe_invert_nct_x64,@function
.align 16
fe_invert_nct_x64:
#else
.section __TEXT,__text
.globl _fe_invert_nct_x64
.p2align 4
_fe_invert_nct_x64:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
subq $0x201, %rsp
movq $-19, %rcx
movq $-1, %r8
movq $-1, %r9
movq $0x7fffffffffffffff, %r10
movq (%rsi), %r11
movq 8(%rsi), %r12
movq 16(%rsi), %r13
movq 24(%rsi), %r14
movq $0x00, %r15
testb $0x01, %r11b
jnz fe_invert_nct_v_even_end
fe_invert_nct_v_even_start:
shrdq $0x01, %r12, %r11
shrdq $0x01, %r13, %r12
shrdq $0x01, %r14, %r13
shrq $0x01, %r14
movb $0x01, (%rsp,%r15,1)
incq %r15
testb $0x01, %r11b
jz fe_invert_nct_v_even_start
fe_invert_nct_v_even_end:
L_fe_invert_nct_uv_start:
cmpq %r14, %r10
jb L_fe_invert_nct_uv_v
ja L_fe_invert_nct_uv_u
cmpq %r13, %r9
jb L_fe_invert_nct_uv_v
ja L_fe_invert_nct_uv_u
cmpq %r12, %r8
jb L_fe_invert_nct_uv_v
ja L_fe_invert_nct_uv_u
cmpq %r11, %rcx
jb L_fe_invert_nct_uv_v
L_fe_invert_nct_uv_u:
movb $2, (%rsp,%r15,1)
incq %r15
subq %r11, %rcx
sbbq %r12, %r8
sbbq %r13, %r9
sbbq %r14, %r10
shrdq $0x01, %r8, %rcx
shrdq $0x01, %r9, %r8
shrdq $0x01, %r10, %r9
shrq $0x01, %r10
testb $0x01, %cl
jnz fe_invert_nct_usubv_even_end
fe_invert_nct_usubv_even_start:
shrdq $0x01, %r8, %rcx
shrdq $0x01, %r9, %r8
shrdq $0x01, %r10, %r9
shrq $0x01, %r10
movb $0x00, (%rsp,%r15,1)
incq %r15
testb $0x01, %cl
jz fe_invert_nct_usubv_even_start
fe_invert_nct_usubv_even_end:
cmpq $0x01, %rcx
jne L_fe_invert_nct_uv_start
movq %r8, %rdx
orq %r9, %rdx
jne L_fe_invert_nct_uv_start
orq %r10, %rdx
jne L_fe_invert_nct_uv_start
movb $0x01, %al
jmp L_fe_invert_nct_uv_end
L_fe_invert_nct_uv_v:
movb $3, (%rsp,%r15,1)
incq %r15
subq %rcx, %r11
sbbq %r8, %r12
sbbq %r9, %r13
sbbq %r10, %r14
shrdq $0x01, %r12, %r11
shrdq $0x01, %r13, %r12
shrdq $0x01, %r14, %r13
shrq $0x01, %r14
testb $0x01, %r11b
jnz fe_invert_nct_vsubu_even_end
fe_invert_nct_vsubu_even_start:
shrdq $0x01, %r12, %r11
shrdq $0x01, %r13, %r12
shrdq $0x01, %r14, %r13
shrq $0x01, %r14
movb $0x01, (%rsp,%r15,1)
incq %r15
testb $0x01, %r11b
jz fe_invert_nct_vsubu_even_start
fe_invert_nct_vsubu_even_end:
cmpq $0x01, %r11
jne L_fe_invert_nct_uv_start
movq %r12, %rdx
orq %r13, %rdx
jne L_fe_invert_nct_uv_start
orq %r14, %rdx
jne L_fe_invert_nct_uv_start
movb $0x00, %al
L_fe_invert_nct_uv_end:
movq $-19, %rcx
movq $-1, %r8
movq $-1, %r9
movq $0x7fffffffffffffff, %r10
movq $0x01, %r11
xorq %r12, %r12
xorq %r13, %r13
xorq %r14, %r14
movb $7, (%rsp,%r15,1)
movb (%rsp), %dl
movq $0x01, %r15
cmpb $0x01, %dl
je L_fe_invert_nct_op_div2_d
jl L_fe_invert_nct_op_div2_b
cmpb $3, %dl
je L_fe_invert_nct_op_d_sub_b
jl L_fe_invert_nct_op_b_sub_d
jmp L_fe_invert_nct_op_end
L_fe_invert_nct_op_b_sub_d:
subq %r11, %rcx
sbbq %r12, %r8
sbbq %r13, %r9
sbbq %r14, %r10
jnc L_fe_invert_nct_op_div2_b
movq $-1, %rdx
addq $-19, %rcx
adcq %rdx, %r8
adcq %rdx, %r9
movq $0x7fffffffffffffff, %rdx
adcq %rdx, %r10
L_fe_invert_nct_op_div2_b:
testb $0x01, %cl
jz L_fe_invert_nct_op_div2_b_mod
addq $-19, %rcx
movq $-1, %rdx
adcq %rdx, %r8
adcq %rdx, %r9
movq $0x7fffffffffffffff, %rdx
adcq %rdx, %r10
L_fe_invert_nct_op_div2_b_mod:
shrdq $0x01, %r8, %rcx
shrdq $0x01, %r9, %r8
shrdq $0x01, %r10, %r9
shrq $0x01, %r10
movb (%rsp,%r15,1), %dl
incq %r15
cmpb $0x01, %dl
je L_fe_invert_nct_op_div2_d
jl L_fe_invert_nct_op_div2_b
cmpb $3, %dl
je L_fe_invert_nct_op_d_sub_b
jl L_fe_invert_nct_op_b_sub_d
jmp L_fe_invert_nct_op_end
L_fe_invert_nct_op_d_sub_b:
subq %rcx, %r11
sbbq %r8, %r12
sbbq %r9, %r13
sbbq %r10, %r14
jnc L_fe_invert_nct_op_div2_d
movq $-1, %rdx
addq $-19, %r11
adcq %rdx, %r12
adcq %rdx, %r13
movq $0x7fffffffffffffff, %rdx
adcq %rdx, %r14
L_fe_invert_nct_op_div2_d:
testb $0x01, %r11b
jz L_fe_invert_nct_op_div2_d_mod
addq $-19, %r11
movq $-1, %rdx
adcq %rdx, %r12
adcq %rdx, %r13
movq $0x7fffffffffffffff, %rdx
adcq %rdx, %r14
L_fe_invert_nct_op_div2_d_mod:
shrdq $0x01, %r12, %r11
shrdq $0x01, %r13, %r12
shrdq $0x01, %r14, %r13
shrq $0x01, %r14
movb (%rsp,%r15,1), %dl
incq %r15
cmpb $0x01, %dl
je L_fe_invert_nct_op_div2_d
jl L_fe_invert_nct_op_div2_b
cmpb $3, %dl
je L_fe_invert_nct_op_d_sub_b
jl L_fe_invert_nct_op_b_sub_d
L_fe_invert_nct_op_end:
cmpb $0x01, %al
jne L_fe_invert_nct_store_d
movq %rcx, (%rdi)
movq %r8, 8(%rdi)
movq %r9, 16(%rdi)
movq %r10, 24(%rdi)
jmp L_fe_invert_nct_store_end
L_fe_invert_nct_store_d:
movq %r11, (%rdi)
movq %r12, 8(%rdi)
movq %r13, 16(%rdi)
movq %r14, 24(%rdi)
L_fe_invert_nct_store_end:
addq $0x201, %rsp
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size fe_invert_nct_x64,.-fe_invert_nct_x64
#endif /* __APPLE__ */
#endif /* HAVE_ED25519 */
#ifdef HAVE_INTEL_AVX2
#ifndef __APPLE__
.text
.globl fe_cmov_table_avx2
.type fe_cmov_table_avx2,@function
.align 16
fe_cmov_table_avx2:
#else
.section __TEXT,__text
.globl _fe_cmov_table_avx2
.p2align 4
_fe_cmov_table_avx2:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
movq %rdx, %rcx
xorq %rbx, %rbx
movsbq %cl, %rax
cdq
xorb %dl, %al
subb %dl, %al
movb %al, %bl
movd %ebx, %xmm7
movq $0x01, %rbx
movd %rbx, %xmm9
vmovdqa %ymm9, %ymm3
vmovdqa %ymm9, %ymm4
vpxor %ymm8, %ymm8, %ymm8
vpermd %ymm7, %ymm8, %ymm7
vpermd %ymm9, %ymm8, %ymm9
vpxor %ymm0, %ymm0, %ymm0
vpxor %ymm1, %ymm1, %ymm1
vpxor %ymm2, %ymm2, %ymm2
vpcmpeqd %ymm7, %ymm8, %ymm6
vpxor %ymm5, %ymm5, %ymm5
vpand %ymm6, %ymm3, %ymm3
vpand %ymm6, %ymm4, %ymm4
vmovdqa %ymm9, %ymm8
vpcmpeqd %ymm7, %ymm8, %ymm6
vpaddd %ymm9, %ymm8, %ymm8
vmovupd (%rsi), %ymm0
vmovupd 32(%rsi), %ymm1
vmovupd 64(%rsi), %ymm2
vpand %ymm6, %ymm0, %ymm0
vpand %ymm6, %ymm1, %ymm1
vpand %ymm6, %ymm2, %ymm2
vpor %ymm0, %ymm3, %ymm3
vpor %ymm1, %ymm4, %ymm4
vpor %ymm2, %ymm5, %ymm5
vpcmpeqd %ymm7, %ymm8, %ymm6
vpaddd %ymm9, %ymm8, %ymm8
vmovupd 96(%rsi), %ymm0
vmovupd 128(%rsi), %ymm1
vmovupd 160(%rsi), %ymm2
vpand %ymm6, %ymm0, %ymm0
vpand %ymm6, %ymm1, %ymm1
vpand %ymm6, %ymm2, %ymm2
vpor %ymm0, %ymm3, %ymm3
vpor %ymm1, %ymm4, %ymm4
vpor %ymm2, %ymm5, %ymm5
vpcmpeqd %ymm7, %ymm8, %ymm6
vpaddd %ymm9, %ymm8, %ymm8
vmovupd 192(%rsi), %ymm0
vmovupd 224(%rsi), %ymm1
vmovupd 256(%rsi), %ymm2
vpand %ymm6, %ymm0, %ymm0
vpand %ymm6, %ymm1, %ymm1
vpand %ymm6, %ymm2, %ymm2
vpor %ymm0, %ymm3, %ymm3
vpor %ymm1, %ymm4, %ymm4
vpor %ymm2, %ymm5, %ymm5
vpcmpeqd %ymm7, %ymm8, %ymm6
vpaddd %ymm9, %ymm8, %ymm8
vmovupd 288(%rsi), %ymm0
vmovupd 320(%rsi), %ymm1
vmovupd 352(%rsi), %ymm2
vpand %ymm6, %ymm0, %ymm0
vpand %ymm6, %ymm1, %ymm1
vpand %ymm6, %ymm2, %ymm2
vpor %ymm0, %ymm3, %ymm3
vpor %ymm1, %ymm4, %ymm4
vpor %ymm2, %ymm5, %ymm5
vpcmpeqd %ymm7, %ymm8, %ymm6
vpaddd %ymm9, %ymm8, %ymm8
vmovupd 384(%rsi), %ymm0
vmovupd 416(%rsi), %ymm1
vmovupd 448(%rsi), %ymm2
vpand %ymm6, %ymm0, %ymm0
vpand %ymm6, %ymm1, %ymm1
vpand %ymm6, %ymm2, %ymm2
vpor %ymm0, %ymm3, %ymm3
vpor %ymm1, %ymm4, %ymm4
vpor %ymm2, %ymm5, %ymm5
vpcmpeqd %ymm7, %ymm8, %ymm6
vpaddd %ymm9, %ymm8, %ymm8
vmovupd 480(%rsi), %ymm0
vmovupd 512(%rsi), %ymm1
vmovupd 544(%rsi), %ymm2
vpand %ymm6, %ymm0, %ymm0
vpand %ymm6, %ymm1, %ymm1
vpand %ymm6, %ymm2, %ymm2
vpor %ymm0, %ymm3, %ymm3
vpor %ymm1, %ymm4, %ymm4
vpor %ymm2, %ymm5, %ymm5
vpcmpeqd %ymm7, %ymm8, %ymm6
vpaddd %ymm9, %ymm8, %ymm8
vmovupd 576(%rsi), %ymm0
vmovupd 608(%rsi), %ymm1
vmovupd 640(%rsi), %ymm2
vpand %ymm6, %ymm0, %ymm0
vpand %ymm6, %ymm1, %ymm1
vpand %ymm6, %ymm2, %ymm2
vpor %ymm0, %ymm3, %ymm3
vpor %ymm1, %ymm4, %ymm4
vpor %ymm2, %ymm5, %ymm5
vpcmpeqd %ymm7, %ymm8, %ymm6
vpaddd %ymm9, %ymm8, %ymm8
vmovupd 672(%rsi), %ymm0
vmovupd 704(%rsi), %ymm1
vmovupd 736(%rsi), %ymm2
vpand %ymm6, %ymm0, %ymm0
vpand %ymm6, %ymm1, %ymm1
vpand %ymm6, %ymm2, %ymm2
vpor %ymm0, %ymm3, %ymm3
vpor %ymm1, %ymm4, %ymm4
vpor %ymm2, %ymm5, %ymm5
movsbq %cl, %rax
sarq $63, %rax
vmovd %eax, %xmm6
vpxor %ymm8, %ymm8, %ymm8
vpermd %ymm6, %ymm8, %ymm6
vpxor %ymm4, %ymm3, %ymm8
vpand %ymm6, %ymm8, %ymm8
vpxor %ymm8, %ymm3, %ymm3
vpxor %ymm8, %ymm4, %ymm4
vmovupd %ymm3, (%rdi)
vmovupd %ymm4, 32(%rdi)
vmovupd %ymm5, 64(%rdi)
movq 64(%rdi), %r8
movq 72(%rdi), %r9
movq 80(%rdi), %r10
movq 88(%rdi), %r11
movq $-19, %r12
movq $-1, %r13
movq $-1, %r14
movq $0x7fffffffffffffff, %r15
subq %r8, %r12
sbbq %r9, %r13
sbbq %r10, %r14
sbbq %r11, %r15
cmpb $0x00, %cl
cmovlq %r12, %r8
cmovlq %r13, %r9
cmovlq %r14, %r10
cmovlq %r15, %r11
movq %r8, 64(%rdi)
movq %r9, 72(%rdi)
movq %r10, 80(%rdi)
movq %r11, 88(%rdi)
vzeroupper
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size fe_cmov_table_avx2,.-fe_cmov_table_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_mul_avx2
.type fe_mul_avx2,@function
.align 16
fe_mul_avx2:
#else
.section __TEXT,__text
.globl _fe_mul_avx2
.p2align 4
_fe_mul_avx2:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbp
movq %rdx, %rbp
movq (%rsi), %rbx
# Multiply
# A[0] * B[0]
movq (%rbp), %rdx
mulxq %rbx, %r8, %r9
# A[2] * B[0]
mulxq 16(%rsi), %r10, %r11
# A[1] * B[0]
mulxq 8(%rsi), %rax, %rcx
xorq %r15, %r15
adcxq %rax, %r9
# A[3] * B[1]
movq 8(%rbp), %rdx
mulxq 24(%rsi), %r12, %r13
adcxq %rcx, %r10
# A[0] * B[1]
mulxq %rbx, %rax, %rcx
adoxq %rax, %r9
# A[2] * B[1]
mulxq 16(%rsi), %rax, %r14
adoxq %rcx, %r10
adcxq %rax, %r11
# A[1] * B[2]
movq 16(%rbp), %rdx
mulxq 8(%rsi), %rax, %rcx
adcxq %r14, %r12
adoxq %rax, %r11
adcxq %r15, %r13
adoxq %rcx, %r12
# A[0] * B[2]
mulxq %rbx, %rax, %rcx
adoxq %r15, %r13
xorq %r14, %r14
adcxq %rax, %r10
# A[1] * B[1]
movq 8(%rbp), %rdx
mulxq 8(%rsi), %rdx, %rax
adcxq %rcx, %r11
adoxq %rdx, %r10
# A[1] * B[3]
movq 24(%rbp), %rdx
adoxq %rax, %r11
mulxq 8(%rsi), %rax, %rcx
adcxq %rax, %r12
# A[2] * B[2]
movq 16(%rbp), %rdx
mulxq 16(%rsi), %rdx, %rax
adcxq %rcx, %r13
adoxq %rdx, %r12
# A[3] * B[3]
movq 24(%rbp), %rdx
adoxq %rax, %r13
mulxq 24(%rsi), %rax, %rcx
adoxq %r15, %r14
adcxq %rax, %r14
# A[0] * B[3]
mulxq %rbx, %rdx, %rax
adcxq %rcx, %r15
xorq %rcx, %rcx
adcxq %rdx, %r11
# A[3] * B[0]
movq 24(%rsi), %rdx
adcxq %rax, %r12
mulxq (%rbp), %rdx, %rax
adoxq %rdx, %r11
adoxq %rax, %r12
# A[3] * B[2]
movq 24(%rsi), %rdx
mulxq 16(%rbp), %rdx, %rax
adcxq %rdx, %r13
# A[2] * B[3]
movq 24(%rbp), %rdx
adcxq %rax, %r14
mulxq 16(%rsi), %rax, %rdx
adcxq %rcx, %r15
adoxq %rax, %r13
adoxq %rdx, %r14
adoxq %rcx, %r15
movq $38, %rdx
mulxq %r15, %r15, %rax
addq %r15, %r11
adcq $0x00, %rax
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r11, %rax
imulq $19, %rax, %rax
andq %rcx, %r11
xorq %rcx, %rcx
adoxq %rax, %r8
mulxq %r12, %rax, %r12
adcxq %rax, %r8
adoxq %r12, %r9
mulxq %r13, %rax, %r13
adcxq %rax, %r9
adoxq %r13, %r10
mulxq %r14, %rax, %r14
adcxq %rax, %r10
adoxq %r14, %r11
adcxq %rcx, %r11
movq $0x7fffffffffffffff, %rcx
movq %r11, %rdx
sarq $63, %rdx
andq $19, %rdx
andq %rcx, %r11
addq %rdx, %r8
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
# Store
movq %r8, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
popq %rbp
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size fe_mul_avx2,.-fe_mul_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_sq_avx2
.type fe_sq_avx2,@function
.align 16
fe_sq_avx2:
#else
.section __TEXT,__text
.globl _fe_sq_avx2
.p2align 4
_fe_sq_avx2:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
pushq %r14
pushq %r15
# Square
movq (%rsi), %rdx
movq 8(%rsi), %rax
# A[0] * A[1]
movq %rdx, %r15
mulxq %rax, %r9, %r10
# A[0] * A[3]
mulxq 24(%rsi), %r11, %r12
# A[2] * A[1]
movq 16(%rsi), %rdx
mulxq %rax, %rcx, %rbx
xorq %r8, %r8
adoxq %rcx, %r11
# A[2] * A[3]
mulxq 24(%rsi), %r13, %r14
adoxq %rbx, %r12
# A[2] * A[0]
mulxq %r15, %rcx, %rbx
adoxq %r8, %r13
adcxq %rcx, %r10
adoxq %r8, %r14
# A[1] * A[3]
movq %rax, %rdx
mulxq 24(%rsi), %rcx, %rdx
adcxq %rbx, %r11
adcxq %rcx, %r12
adcxq %rdx, %r13
adcxq %r8, %r14
# A[0] * A[0]
movq %r15, %rdx
mulxq %rdx, %r8, %rcx
xorq %r15, %r15
adcxq %r9, %r9
# A[1] * A[1]
movq %rax, %rdx
adoxq %rcx, %r9
mulxq %rdx, %rcx, %rbx
adcxq %r10, %r10
adoxq %rcx, %r10
adcxq %r11, %r11
# A[2] * A[2]
movq 16(%rsi), %rdx
adoxq %rbx, %r11
mulxq %rdx, %rbx, %rcx
adcxq %r12, %r12
adoxq %rbx, %r12
adcxq %r13, %r13
# A[3] * A[3]
movq 24(%rsi), %rdx
adoxq %rcx, %r13
mulxq %rdx, %rcx, %rbx
adcxq %r14, %r14
adoxq %rcx, %r14
adcxq %r15, %r15
adoxq %rbx, %r15
movq $38, %rdx
mulxq %r15, %r15, %rbx
addq %r15, %r11
adcq $0x00, %rbx
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r11, %rbx
imulq $19, %rbx, %rbx
andq %rcx, %r11
xorq %rcx, %rcx
adoxq %rbx, %r8
mulxq %r12, %rbx, %r12
adcxq %rbx, %r8
adoxq %r12, %r9
mulxq %r13, %rbx, %r13
adcxq %rbx, %r9
adoxq %r13, %r10
mulxq %r14, %rbx, %r14
adcxq %rbx, %r10
adoxq %r14, %r11
adcxq %rcx, %r11
movq $0x7fffffffffffffff, %rcx
movq %r11, %rdx
sarq $63, %rdx
andq $19, %rdx
andq %rcx, %r11
addq %rdx, %r8
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
# Store
movq %r8, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size fe_sq_avx2,.-fe_sq_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_sq_n_avx2
.type fe_sq_n_avx2,@function
.align 16
fe_sq_n_avx2:
#else
.section __TEXT,__text
.globl _fe_sq_n_avx2
.p2align 4
_fe_sq_n_avx2:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbp
movq %rdx, %rbp
L_fe_sq_n_avx2:
# Square
movq (%rsi), %rdx
movq 8(%rsi), %rax
# A[0] * A[1]
movq %rdx, %r15
mulxq %rax, %r9, %r10
# A[0] * A[3]
mulxq 24(%rsi), %r11, %r12
# A[2] * A[1]
movq 16(%rsi), %rdx
mulxq %rax, %rcx, %rbx
xorq %r8, %r8
adoxq %rcx, %r11
# A[2] * A[3]
mulxq 24(%rsi), %r13, %r14
adoxq %rbx, %r12
# A[2] * A[0]
mulxq %r15, %rcx, %rbx
adoxq %r8, %r13
adcxq %rcx, %r10
adoxq %r8, %r14
# A[1] * A[3]
movq %rax, %rdx
mulxq 24(%rsi), %rcx, %rdx
adcxq %rbx, %r11
adcxq %rcx, %r12
adcxq %rdx, %r13
adcxq %r8, %r14
# A[0] * A[0]
movq %r15, %rdx
mulxq %rdx, %r8, %rcx
xorq %r15, %r15
adcxq %r9, %r9
# A[1] * A[1]
movq %rax, %rdx
adoxq %rcx, %r9
mulxq %rdx, %rcx, %rbx
adcxq %r10, %r10
adoxq %rcx, %r10
adcxq %r11, %r11
# A[2] * A[2]
movq 16(%rsi), %rdx
adoxq %rbx, %r11
mulxq %rdx, %rbx, %rcx
adcxq %r12, %r12
adoxq %rbx, %r12
adcxq %r13, %r13
# A[3] * A[3]
movq 24(%rsi), %rdx
adoxq %rcx, %r13
mulxq %rdx, %rcx, %rbx
adcxq %r14, %r14
adoxq %rcx, %r14
adcxq %r15, %r15
adoxq %rbx, %r15
movq $38, %rdx
mulxq %r15, %r15, %rbx
addq %r15, %r11
adcq $0x00, %rbx
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r11, %rbx
imulq $19, %rbx, %rbx
andq %rcx, %r11
xorq %rcx, %rcx
adoxq %rbx, %r8
mulxq %r12, %rbx, %r12
adcxq %rbx, %r8
adoxq %r12, %r9
mulxq %r13, %rbx, %r13
adcxq %rbx, %r9
adoxq %r13, %r10
mulxq %r14, %rbx, %r14
adcxq %rbx, %r10
adoxq %r14, %r11
adcxq %rcx, %r11
# Store
movq %r8, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
decb %bpl
jnz L_fe_sq_n_avx2
popq %rbp
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size fe_sq_n_avx2,.-fe_sq_n_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_mul121666_avx2
.type fe_mul121666_avx2,@function
.align 16
fe_mul121666_avx2:
#else
.section __TEXT,__text
.globl _fe_mul121666_avx2
.p2align 4
_fe_mul121666_avx2:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
movq $0x1db42, %rdx
mulxq (%rsi), %rax, %r13
mulxq 8(%rsi), %rcx, %r12
mulxq 16(%rsi), %r8, %r11
addq %r13, %rcx
mulxq 24(%rsi), %r9, %r10
adcq %r12, %r8
adcq %r11, %r9
adcq $0x00, %r10
shldq $0x01, %r9, %r10
btr $63, %r9
imulq $19, %r10, %r10
addq %r10, %rax
adcq $0x00, %rcx
adcq $0x00, %r8
adcq $0x00, %r9
movq %rax, (%rdi)
movq %rcx, 8(%rdi)
movq %r8, 16(%rdi)
movq %r9, 24(%rdi)
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size fe_mul121666_avx2,.-fe_mul121666_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_invert_avx2
.type fe_invert_avx2,@function
.align 16
fe_invert_avx2:
#else
.section __TEXT,__text
.globl _fe_invert_avx2
.p2align 4
_fe_invert_avx2:
#endif /* __APPLE__ */
subq $0x90, %rsp
# Invert
movq %rdi, 128(%rsp)
movq %rsi, 136(%rsp)
movq %rsp, %rdi
movq 136(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
movq 136(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
movq %rsp, %rdi
movq %rsp, %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $4, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $9, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $19, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $9, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $49, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $0x63, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $49, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
movq $4, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
movq 128(%rsp), %rdi
leaq 32(%rsp), %rsi
movq %rsp, %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
movq 136(%rsp), %rsi
movq 128(%rsp), %rdi
addq $0x90, %rsp
repz retq
#if defined(WOLFSSL_CURVE25519_NOT_USE_ED25519)
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 32
#else
.p2align 5
#endif /* __APPLE__ */
L_curve25519_base_avx2_x2:
.quad 0x5cae469cdd684efb, 0x8f3f5ced1e350b5c
.quad 0xd9750c687d157114, 0x20d342d51873f1b7
#ifndef __APPLE__
.text
.globl curve25519_base_avx2
.type curve25519_base_avx2,@function
.align 16
curve25519_base_avx2:
#else
.section __TEXT,__text
.globl _curve25519_base_avx2
.p2align 4
_curve25519_base_avx2:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbp
subq $0xb0, %rsp
movq $0x00, 168(%rsp)
movq %rdi, 160(%rsp)
# Set base point x
movq $9, (%rdi)
movq $0x00, 8(%rdi)
movq $0x00, 16(%rdi)
movq $0x00, 24(%rdi)
# Set one
movq $0x01, (%rsp)
movq $0x00, 8(%rsp)
movq $0x00, 16(%rsp)
movq $0x00, 24(%rsp)
movq 0+L_curve25519_base_avx2_x2(%rip), %r8
movq 8+L_curve25519_base_avx2_x2(%rip), %r9
movq 16+L_curve25519_base_avx2_x2(%rip), %r10
movq 24+L_curve25519_base_avx2_x2(%rip), %r11
# Set one
movq $0x01, 32(%rsp)
movq $0x00, 40(%rsp)
movq $0x00, 48(%rsp)
movq $0x00, 56(%rsp)
movq %r8, 64(%rsp)
movq %r9, 72(%rsp)
movq %r10, 80(%rsp)
movq %r11, 88(%rsp)
movq $0xfd, %rbp
L_curve25519_base_avx2_bits:
movq 168(%rsp), %rax
movq %rbp, %rbx
movq %rbp, %rcx
shrq $6, %rbx
andq $63, %rcx
movq (%rsi,%rbx,8), %rbx
shrq %cl, %rbx
andq $0x01, %rbx
xorq %rbx, %rax
negq %rax
# Conditional Swap
movq (%rdi), %r8
movq 8(%rdi), %r9
movq 16(%rdi), %r10
movq 24(%rdi), %r11
movq (%rsp), %r12
movq 8(%rsp), %r13
movq 16(%rsp), %r14
movq 24(%rsp), %r15
xorq 64(%rsp), %r8
xorq 72(%rsp), %r9
xorq 80(%rsp), %r10
xorq 88(%rsp), %r11
xorq 32(%rsp), %r12
xorq 40(%rsp), %r13
xorq 48(%rsp), %r14
xorq 56(%rsp), %r15
andq %rax, %r8
andq %rax, %r9
andq %rax, %r10
andq %rax, %r11
andq %rax, %r12
andq %rax, %r13
andq %rax, %r14
andq %rax, %r15
xorq %r8, (%rdi)
xorq %r9, 8(%rdi)
xorq %r10, 16(%rdi)
xorq %r11, 24(%rdi)
xorq %r12, (%rsp)
xorq %r13, 8(%rsp)
xorq %r14, 16(%rsp)
xorq %r15, 24(%rsp)
xorq %r8, 64(%rsp)
xorq %r9, 72(%rsp)
xorq %r10, 80(%rsp)
xorq %r11, 88(%rsp)
xorq %r12, 32(%rsp)
xorq %r13, 40(%rsp)
xorq %r14, 48(%rsp)
xorq %r15, 56(%rsp)
movq %rbx, 168(%rsp)
# Add-Sub
# Add
movq (%rdi), %r8
movq 8(%rdi), %r9
movq 16(%rdi), %r10
movq 24(%rdi), %r11
movq %r8, %r12
addq (%rsp), %r8
movq %r9, %r13
adcq 8(%rsp), %r9
movq %r10, %r14
adcq 16(%rsp), %r10
movq %r11, %r15
adcq 24(%rsp), %r11
movq $0x00, %rbx
adcq $0x00, %rbx
shldq $0x01, %r11, %rbx
imulq $19, %rbx
btr $63, %r11
# Sub modulus (if overflow)
addq %rbx, %r8
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
# Sub
subq (%rsp), %r12
sbbq 8(%rsp), %r13
sbbq 16(%rsp), %r14
sbbq 24(%rsp), %r15
sbbq %rbx, %rbx
shldq $0x01, %r15, %rbx
imulq $-19, %rbx
btr $63, %r15
# Add modulus (if underflow)
subq %rbx, %r12
sbbq $0x00, %r13
sbbq $0x00, %r14
sbbq $0x00, %r15
movq %r8, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
movq %r12, 128(%rsp)
movq %r13, 136(%rsp)
movq %r14, 144(%rsp)
movq %r15, 152(%rsp)
# Add-Sub
# Add
movq 64(%rsp), %r8
movq 72(%rsp), %r9
movq 80(%rsp), %r10
movq 88(%rsp), %r11
movq %r8, %r12
addq 32(%rsp), %r8
movq %r9, %r13
adcq 40(%rsp), %r9
movq %r10, %r14
adcq 48(%rsp), %r10
movq %r11, %r15
adcq 56(%rsp), %r11
movq $0x00, %rbx
adcq $0x00, %rbx
shldq $0x01, %r11, %rbx
imulq $19, %rbx
btr $63, %r11
# Sub modulus (if overflow)
addq %rbx, %r8
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
# Sub
subq 32(%rsp), %r12
sbbq 40(%rsp), %r13
sbbq 48(%rsp), %r14
sbbq 56(%rsp), %r15
sbbq %rbx, %rbx
shldq $0x01, %r15, %rbx
imulq $-19, %rbx
btr $63, %r15
# Add modulus (if underflow)
subq %rbx, %r12
sbbq $0x00, %r13
sbbq $0x00, %r14
sbbq $0x00, %r15
movq %r8, 32(%rsp)
movq %r9, 40(%rsp)
movq %r10, 48(%rsp)
movq %r11, 56(%rsp)
movq %r12, 96(%rsp)
movq %r13, 104(%rsp)
movq %r14, 112(%rsp)
movq %r15, 120(%rsp)
movq 32(%rsp), %rax
# Multiply
# A[0] * B[0]
movq 128(%rsp), %rdx
mulxq %rax, %r8, %r9
# A[2] * B[0]
mulxq 48(%rsp), %r10, %r11
# A[1] * B[0]
mulxq 40(%rsp), %rcx, %rbx
xorq %r15, %r15
adcxq %rcx, %r9
# A[3] * B[1]
movq 136(%rsp), %rdx
mulxq 56(%rsp), %r12, %r13
adcxq %rbx, %r10
# A[0] * B[1]
mulxq %rax, %rcx, %rbx
adoxq %rcx, %r9
# A[2] * B[1]
mulxq 48(%rsp), %rcx, %r14
adoxq %rbx, %r10
adcxq %rcx, %r11
# A[1] * B[2]
movq 144(%rsp), %rdx
mulxq 40(%rsp), %rcx, %rbx
adcxq %r14, %r12
adoxq %rcx, %r11
adcxq %r15, %r13
adoxq %rbx, %r12
# A[0] * B[2]
mulxq %rax, %rcx, %rbx
adoxq %r15, %r13
xorq %r14, %r14
adcxq %rcx, %r10
# A[1] * B[1]
movq 136(%rsp), %rdx
mulxq 40(%rsp), %rdx, %rcx
adcxq %rbx, %r11
adoxq %rdx, %r10
# A[1] * B[3]
movq 152(%rsp), %rdx
adoxq %rcx, %r11
mulxq 40(%rsp), %rcx, %rbx
adcxq %rcx, %r12
# A[2] * B[2]
movq 144(%rsp), %rdx
mulxq 48(%rsp), %rdx, %rcx
adcxq %rbx, %r13
adoxq %rdx, %r12
# A[3] * B[3]
movq 152(%rsp), %rdx
adoxq %rcx, %r13
mulxq 56(%rsp), %rcx, %rbx
adoxq %r15, %r14
adcxq %rcx, %r14
# A[0] * B[3]
mulxq %rax, %rdx, %rcx
adcxq %rbx, %r15
xorq %rbx, %rbx
adcxq %rdx, %r11
# A[3] * B[0]
movq 56(%rsp), %rdx
adcxq %rcx, %r12
mulxq 128(%rsp), %rdx, %rcx
adoxq %rdx, %r11
adoxq %rcx, %r12
# A[3] * B[2]
movq 56(%rsp), %rdx
mulxq 144(%rsp), %rdx, %rcx
adcxq %rdx, %r13
# A[2] * B[3]
movq 152(%rsp), %rdx
adcxq %rcx, %r14
mulxq 48(%rsp), %rcx, %rdx
adcxq %rbx, %r15
adoxq %rcx, %r13
adoxq %rdx, %r14
adoxq %rbx, %r15
movq $38, %rdx
mulxq %r15, %r15, %rcx
addq %r15, %r11
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r11, %rcx
imulq $19, %rcx, %rcx
andq %rbx, %r11
xorq %rbx, %rbx
adoxq %rcx, %r8
mulxq %r12, %rcx, %r12
adcxq %rcx, %r8
adoxq %r12, %r9
mulxq %r13, %rcx, %r13
adcxq %rcx, %r9
adoxq %r13, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
adcxq %rbx, %r11
# Store
movq %r8, 32(%rsp)
movq %r9, 40(%rsp)
movq %r10, 48(%rsp)
movq %r11, 56(%rsp)
movq 96(%rsp), %rax
# Multiply
# A[0] * B[0]
movq (%rdi), %rdx
mulxq %rax, %r8, %r9
# A[2] * B[0]
mulxq 112(%rsp), %r10, %r11
# A[1] * B[0]
mulxq 104(%rsp), %rcx, %rbx
xorq %r15, %r15
adcxq %rcx, %r9
# A[3] * B[1]
movq 8(%rdi), %rdx
mulxq 120(%rsp), %r12, %r13
adcxq %rbx, %r10
# A[0] * B[1]
mulxq %rax, %rcx, %rbx
adoxq %rcx, %r9
# A[2] * B[1]
mulxq 112(%rsp), %rcx, %r14
adoxq %rbx, %r10
adcxq %rcx, %r11
# A[1] * B[2]
movq 16(%rdi), %rdx
mulxq 104(%rsp), %rcx, %rbx
adcxq %r14, %r12
adoxq %rcx, %r11
adcxq %r15, %r13
adoxq %rbx, %r12
# A[0] * B[2]
mulxq %rax, %rcx, %rbx
adoxq %r15, %r13
xorq %r14, %r14
adcxq %rcx, %r10
# A[1] * B[1]
movq 8(%rdi), %rdx
mulxq 104(%rsp), %rdx, %rcx
adcxq %rbx, %r11
adoxq %rdx, %r10
# A[1] * B[3]
movq 24(%rdi), %rdx
adoxq %rcx, %r11
mulxq 104(%rsp), %rcx, %rbx
adcxq %rcx, %r12
# A[2] * B[2]
movq 16(%rdi), %rdx
mulxq 112(%rsp), %rdx, %rcx
adcxq %rbx, %r13
adoxq %rdx, %r12
# A[3] * B[3]
movq 24(%rdi), %rdx
adoxq %rcx, %r13
mulxq 120(%rsp), %rcx, %rbx
adoxq %r15, %r14
adcxq %rcx, %r14
# A[0] * B[3]
mulxq %rax, %rdx, %rcx
adcxq %rbx, %r15
xorq %rbx, %rbx
adcxq %rdx, %r11
# A[3] * B[0]
movq 120(%rsp), %rdx
adcxq %rcx, %r12
mulxq (%rdi), %rdx, %rcx
adoxq %rdx, %r11
adoxq %rcx, %r12
# A[3] * B[2]
movq 120(%rsp), %rdx
mulxq 16(%rdi), %rdx, %rcx
adcxq %rdx, %r13
# A[2] * B[3]
movq 24(%rdi), %rdx
adcxq %rcx, %r14
mulxq 112(%rsp), %rcx, %rdx
adcxq %rbx, %r15
adoxq %rcx, %r13
adoxq %rdx, %r14
adoxq %rbx, %r15
movq $38, %rdx
mulxq %r15, %r15, %rcx
addq %r15, %r11
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r11, %rcx
imulq $19, %rcx, %rcx
andq %rbx, %r11
xorq %rbx, %rbx
adoxq %rcx, %r8
mulxq %r12, %rcx, %r12
adcxq %rcx, %r8
adoxq %r12, %r9
mulxq %r13, %rcx, %r13
adcxq %rcx, %r9
adoxq %r13, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
adcxq %rbx, %r11
# Store
movq %r8, (%rsp)
movq %r9, 8(%rsp)
movq %r10, 16(%rsp)
movq %r11, 24(%rsp)
# Square
movq 128(%rsp), %rdx
movq 136(%rsp), %rax
# A[0] * A[1]
movq %rdx, %r15
mulxq %rax, %r9, %r10
# A[0] * A[3]
mulxq 152(%rsp), %r11, %r12
# A[2] * A[1]
movq 144(%rsp), %rdx
mulxq %rax, %rcx, %rbx
xorq %r8, %r8
adoxq %rcx, %r11
# A[2] * A[3]
mulxq 152(%rsp), %r13, %r14
adoxq %rbx, %r12
# A[2] * A[0]
mulxq %r15, %rcx, %rbx
adoxq %r8, %r13
adcxq %rcx, %r10
adoxq %r8, %r14
# A[1] * A[3]
movq %rax, %rdx
mulxq 152(%rsp), %rcx, %rdx
adcxq %rbx, %r11
adcxq %rcx, %r12
adcxq %rdx, %r13
adcxq %r8, %r14
# A[0] * A[0]
movq %r15, %rdx
mulxq %rdx, %r8, %rcx
xorq %r15, %r15
adcxq %r9, %r9
# A[1] * A[1]
movq %rax, %rdx
adoxq %rcx, %r9
mulxq %rdx, %rcx, %rbx
adcxq %r10, %r10
adoxq %rcx, %r10
adcxq %r11, %r11
# A[2] * A[2]
movq 144(%rsp), %rdx
adoxq %rbx, %r11
mulxq %rdx, %rbx, %rcx
adcxq %r12, %r12
adoxq %rbx, %r12
adcxq %r13, %r13
# A[3] * A[3]
movq 152(%rsp), %rdx
adoxq %rcx, %r13
mulxq %rdx, %rcx, %rbx
adcxq %r14, %r14
adoxq %rcx, %r14
adcxq %r15, %r15
adoxq %rbx, %r15
movq $38, %rdx
mulxq %r15, %r15, %rbx
addq %r15, %r11
adcq $0x00, %rbx
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r11, %rbx
imulq $19, %rbx, %rbx
andq %rcx, %r11
xorq %rcx, %rcx
adoxq %rbx, %r8
mulxq %r12, %rbx, %r12
adcxq %rbx, %r8
adoxq %r12, %r9
mulxq %r13, %rbx, %r13
adcxq %rbx, %r9
adoxq %r13, %r10
mulxq %r14, %rbx, %r14
adcxq %rbx, %r10
adoxq %r14, %r11
adcxq %rcx, %r11
# Store
movq %r8, 96(%rsp)
movq %r9, 104(%rsp)
movq %r10, 112(%rsp)
movq %r11, 120(%rsp)
# Square
movq (%rdi), %rdx
movq 8(%rdi), %rax
# A[0] * A[1]
movq %rdx, %r15
mulxq %rax, %r9, %r10
# A[0] * A[3]
mulxq 24(%rdi), %r11, %r12
# A[2] * A[1]
movq 16(%rdi), %rdx
mulxq %rax, %rcx, %rbx
xorq %r8, %r8
adoxq %rcx, %r11
# A[2] * A[3]
mulxq 24(%rdi), %r13, %r14
adoxq %rbx, %r12
# A[2] * A[0]
mulxq %r15, %rcx, %rbx
adoxq %r8, %r13
adcxq %rcx, %r10
adoxq %r8, %r14
# A[1] * A[3]
movq %rax, %rdx
mulxq 24(%rdi), %rcx, %rdx
adcxq %rbx, %r11
adcxq %rcx, %r12
adcxq %rdx, %r13
adcxq %r8, %r14
# A[0] * A[0]
movq %r15, %rdx
mulxq %rdx, %r8, %rcx
xorq %r15, %r15
adcxq %r9, %r9
# A[1] * A[1]
movq %rax, %rdx
adoxq %rcx, %r9
mulxq %rdx, %rcx, %rbx
adcxq %r10, %r10
adoxq %rcx, %r10
adcxq %r11, %r11
# A[2] * A[2]
movq 16(%rdi), %rdx
adoxq %rbx, %r11
mulxq %rdx, %rbx, %rcx
adcxq %r12, %r12
adoxq %rbx, %r12
adcxq %r13, %r13
# A[3] * A[3]
movq 24(%rdi), %rdx
adoxq %rcx, %r13
mulxq %rdx, %rcx, %rbx
adcxq %r14, %r14
adoxq %rcx, %r14
adcxq %r15, %r15
adoxq %rbx, %r15
movq $38, %rdx
mulxq %r15, %r15, %rbx
addq %r15, %r11
adcq $0x00, %rbx
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r11, %rbx
imulq $19, %rbx, %rbx
andq %rcx, %r11
xorq %rcx, %rcx
adoxq %rbx, %r8
mulxq %r12, %rbx, %r12
adcxq %rbx, %r8
adoxq %r12, %r9
mulxq %r13, %rbx, %r13
adcxq %rbx, %r9
adoxq %r13, %r10
mulxq %r14, %rbx, %r14
adcxq %rbx, %r10
adoxq %r14, %r11
adcxq %rcx, %r11
# Store
movq %r8, 128(%rsp)
movq %r9, 136(%rsp)
movq %r10, 144(%rsp)
movq %r11, 152(%rsp)
# Add-Sub
# Add
movq (%rsp), %r8
movq 8(%rsp), %r9
movq 16(%rsp), %r10
movq 24(%rsp), %r11
movq %r8, %r12
addq 32(%rsp), %r8
movq %r9, %r13
adcq 40(%rsp), %r9
movq %r10, %r14
adcq 48(%rsp), %r10
movq %r11, %r15
adcq 56(%rsp), %r11
movq $0x00, %rbx
adcq $0x00, %rbx
shldq $0x01, %r11, %rbx
imulq $19, %rbx
btr $63, %r11
# Sub modulus (if overflow)
addq %rbx, %r8
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
# Sub
subq 32(%rsp), %r12
sbbq 40(%rsp), %r13
sbbq 48(%rsp), %r14
sbbq 56(%rsp), %r15
sbbq %rbx, %rbx
shldq $0x01, %r15, %rbx
imulq $-19, %rbx
btr $63, %r15
# Add modulus (if underflow)
subq %rbx, %r12
sbbq $0x00, %r13
sbbq $0x00, %r14
sbbq $0x00, %r15
movq %r8, 64(%rsp)
movq %r9, 72(%rsp)
movq %r10, 80(%rsp)
movq %r11, 88(%rsp)
movq %r12, 32(%rsp)
movq %r13, 40(%rsp)
movq %r14, 48(%rsp)
movq %r15, 56(%rsp)
movq 128(%rsp), %rax
# Multiply
# A[0] * B[0]
movq 96(%rsp), %rdx
mulxq %rax, %r8, %r9
# A[2] * B[0]
mulxq 144(%rsp), %r10, %r11
# A[1] * B[0]
mulxq 136(%rsp), %rcx, %rbx
xorq %r15, %r15
adcxq %rcx, %r9
# A[3] * B[1]
movq 104(%rsp), %rdx
mulxq 152(%rsp), %r12, %r13
adcxq %rbx, %r10
# A[0] * B[1]
mulxq %rax, %rcx, %rbx
adoxq %rcx, %r9
# A[2] * B[1]
mulxq 144(%rsp), %rcx, %r14
adoxq %rbx, %r10
adcxq %rcx, %r11
# A[1] * B[2]
movq 112(%rsp), %rdx
mulxq 136(%rsp), %rcx, %rbx
adcxq %r14, %r12
adoxq %rcx, %r11
adcxq %r15, %r13
adoxq %rbx, %r12
# A[0] * B[2]
mulxq %rax, %rcx, %rbx
adoxq %r15, %r13
xorq %r14, %r14
adcxq %rcx, %r10
# A[1] * B[1]
movq 104(%rsp), %rdx
mulxq 136(%rsp), %rdx, %rcx
adcxq %rbx, %r11
adoxq %rdx, %r10
# A[1] * B[3]
movq 120(%rsp), %rdx
adoxq %rcx, %r11
mulxq 136(%rsp), %rcx, %rbx
adcxq %rcx, %r12
# A[2] * B[2]
movq 112(%rsp), %rdx
mulxq 144(%rsp), %rdx, %rcx
adcxq %rbx, %r13
adoxq %rdx, %r12
# A[3] * B[3]
movq 120(%rsp), %rdx
adoxq %rcx, %r13
mulxq 152(%rsp), %rcx, %rbx
adoxq %r15, %r14
adcxq %rcx, %r14
# A[0] * B[3]
mulxq %rax, %rdx, %rcx
adcxq %rbx, %r15
xorq %rbx, %rbx
adcxq %rdx, %r11
# A[3] * B[0]
movq 152(%rsp), %rdx
adcxq %rcx, %r12
mulxq 96(%rsp), %rdx, %rcx
adoxq %rdx, %r11
adoxq %rcx, %r12
# A[3] * B[2]
movq 152(%rsp), %rdx
mulxq 112(%rsp), %rdx, %rcx
adcxq %rdx, %r13
# A[2] * B[3]
movq 120(%rsp), %rdx
adcxq %rcx, %r14
mulxq 144(%rsp), %rcx, %rdx
adcxq %rbx, %r15
adoxq %rcx, %r13
adoxq %rdx, %r14
adoxq %rbx, %r15
movq $38, %rdx
mulxq %r15, %r15, %rcx
addq %r15, %r11
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r11, %rcx
imulq $19, %rcx, %rcx
andq %rbx, %r11
xorq %rbx, %rbx
adoxq %rcx, %r8
mulxq %r12, %rcx, %r12
adcxq %rcx, %r8
adoxq %r12, %r9
mulxq %r13, %rcx, %r13
adcxq %rcx, %r9
adoxq %r13, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
adcxq %rbx, %r11
# Store
movq %r8, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
# Sub
movq 128(%rsp), %r8
movq 136(%rsp), %r9
movq 144(%rsp), %r10
movq 152(%rsp), %r11
subq 96(%rsp), %r8
sbbq 104(%rsp), %r9
sbbq 112(%rsp), %r10
sbbq 120(%rsp), %r11
sbbq %rbx, %rbx
shldq $0x01, %r11, %rbx
imulq $-19, %rbx
btr $63, %r11
# Add modulus (if underflow)
subq %rbx, %r8
sbbq $0x00, %r9
sbbq $0x00, %r10
sbbq $0x00, %r11
movq %r8, 128(%rsp)
movq %r9, 136(%rsp)
movq %r10, 144(%rsp)
movq %r11, 152(%rsp)
# Square
movq 32(%rsp), %rdx
movq 40(%rsp), %rax
# A[0] * A[1]
movq %rdx, %r15
mulxq %rax, %r9, %r10
# A[0] * A[3]
mulxq 56(%rsp), %r11, %r12
# A[2] * A[1]
movq 48(%rsp), %rdx
mulxq %rax, %rcx, %rbx
xorq %r8, %r8
adoxq %rcx, %r11
# A[2] * A[3]
mulxq 56(%rsp), %r13, %r14
adoxq %rbx, %r12
# A[2] * A[0]
mulxq %r15, %rcx, %rbx
adoxq %r8, %r13
adcxq %rcx, %r10
adoxq %r8, %r14
# A[1] * A[3]
movq %rax, %rdx
mulxq 56(%rsp), %rcx, %rdx
adcxq %rbx, %r11
adcxq %rcx, %r12
adcxq %rdx, %r13
adcxq %r8, %r14
# A[0] * A[0]
movq %r15, %rdx
mulxq %rdx, %r8, %rcx
xorq %r15, %r15
adcxq %r9, %r9
# A[1] * A[1]
movq %rax, %rdx
adoxq %rcx, %r9
mulxq %rdx, %rcx, %rbx
adcxq %r10, %r10
adoxq %rcx, %r10
adcxq %r11, %r11
# A[2] * A[2]
movq 48(%rsp), %rdx
adoxq %rbx, %r11
mulxq %rdx, %rbx, %rcx
adcxq %r12, %r12
adoxq %rbx, %r12
adcxq %r13, %r13
# A[3] * A[3]
movq 56(%rsp), %rdx
adoxq %rcx, %r13
mulxq %rdx, %rcx, %rbx
adcxq %r14, %r14
adoxq %rcx, %r14
adcxq %r15, %r15
adoxq %rbx, %r15
movq $38, %rdx
mulxq %r15, %r15, %rbx
addq %r15, %r11
adcq $0x00, %rbx
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r11, %rbx
imulq $19, %rbx, %rbx
andq %rcx, %r11
xorq %rcx, %rcx
adoxq %rbx, %r8
mulxq %r12, %rbx, %r12
adcxq %rbx, %r8
adoxq %r12, %r9
mulxq %r13, %rbx, %r13
adcxq %rbx, %r9
adoxq %r13, %r10
mulxq %r14, %rbx, %r14
adcxq %rbx, %r10
adoxq %r14, %r11
adcxq %rcx, %r11
# Store
movq %r8, 32(%rsp)
movq %r9, 40(%rsp)
movq %r10, 48(%rsp)
movq %r11, 56(%rsp)
# Square
movq 64(%rsp), %rdx
movq 72(%rsp), %rax
# A[0] * A[1]
movq %rdx, %r15
mulxq %rax, %r9, %r10
# A[0] * A[3]
mulxq 88(%rsp), %r11, %r12
# A[2] * A[1]
movq 80(%rsp), %rdx
mulxq %rax, %rcx, %rbx
xorq %r8, %r8
adoxq %rcx, %r11
# A[2] * A[3]
mulxq 88(%rsp), %r13, %r14
adoxq %rbx, %r12
# A[2] * A[0]
mulxq %r15, %rcx, %rbx
adoxq %r8, %r13
adcxq %rcx, %r10
adoxq %r8, %r14
# A[1] * A[3]
movq %rax, %rdx
mulxq 88(%rsp), %rcx, %rdx
adcxq %rbx, %r11
adcxq %rcx, %r12
adcxq %rdx, %r13
adcxq %r8, %r14
# A[0] * A[0]
movq %r15, %rdx
mulxq %rdx, %r8, %rcx
xorq %r15, %r15
adcxq %r9, %r9
# A[1] * A[1]
movq %rax, %rdx
adoxq %rcx, %r9
mulxq %rdx, %rcx, %rbx
adcxq %r10, %r10
adoxq %rcx, %r10
adcxq %r11, %r11
# A[2] * A[2]
movq 80(%rsp), %rdx
adoxq %rbx, %r11
mulxq %rdx, %rbx, %rcx
adcxq %r12, %r12
adoxq %rbx, %r12
adcxq %r13, %r13
# A[3] * A[3]
movq 88(%rsp), %rdx
adoxq %rcx, %r13
mulxq %rdx, %rcx, %rbx
adcxq %r14, %r14
adoxq %rcx, %r14
adcxq %r15, %r15
adoxq %rbx, %r15
movq $38, %rdx
mulxq %r15, %r15, %rbx
addq %r15, %r11
adcq $0x00, %rbx
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r11, %rbx
imulq $19, %rbx, %rbx
andq %rcx, %r11
xorq %rcx, %rcx
adoxq %rbx, %r8
mulxq %r12, %rbx, %r12
adcxq %rbx, %r8
adoxq %r12, %r9
mulxq %r13, %rbx, %r13
adcxq %rbx, %r9
adoxq %r13, %r10
mulxq %r14, %rbx, %r14
adcxq %rbx, %r10
adoxq %r14, %r11
adcxq %rcx, %r11
# Store
movq %r8, 64(%rsp)
movq %r9, 72(%rsp)
movq %r10, 80(%rsp)
movq %r11, 88(%rsp)
movq $0x1db42, %rdx
mulxq 128(%rsp), %r8, %r15
mulxq 136(%rsp), %r9, %r14
mulxq 144(%rsp), %r10, %r13
addq %r15, %r9
mulxq 152(%rsp), %r11, %r12
adcq %r14, %r10
adcq %r13, %r11
adcq $0x00, %r12
addq 96(%rsp), %r8
adcq 104(%rsp), %r9
adcq 112(%rsp), %r10
adcq 120(%rsp), %r11
adcq $0x00, %r12
shldq $0x01, %r11, %r12
btr $63, %r11
imulq $19, %r12, %r12
addq %r12, %r8
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
movq %r8, 96(%rsp)
movq %r9, 104(%rsp)
movq %r10, 112(%rsp)
movq %r11, 120(%rsp)
movq $9, %rdx
mulxq 32(%rsp), %r8, %r15
mulxq 40(%rsp), %r9, %r14
mulxq 48(%rsp), %r10, %r13
addq %r15, %r9
mulxq 56(%rsp), %r11, %r12
adcq %r14, %r10
adcq %r13, %r11
adcq $0x00, %r12
shldq $0x01, %r11, %r12
btr $63, %r11
imulq $19, %r12, %r12
addq %r12, %r8
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
movq %r8, 32(%rsp)
movq %r9, 40(%rsp)
movq %r10, 48(%rsp)
movq %r11, 56(%rsp)
movq 128(%rsp), %rax
# Multiply
# A[0] * B[0]
movq 96(%rsp), %rdx
mulxq %rax, %r8, %r9
# A[2] * B[0]
mulxq 144(%rsp), %r10, %r11
# A[1] * B[0]
mulxq 136(%rsp), %rcx, %rbx
xorq %r15, %r15
adcxq %rcx, %r9
# A[3] * B[1]
movq 104(%rsp), %rdx
mulxq 152(%rsp), %r12, %r13
adcxq %rbx, %r10
# A[0] * B[1]
mulxq %rax, %rcx, %rbx
adoxq %rcx, %r9
# A[2] * B[1]
mulxq 144(%rsp), %rcx, %r14
adoxq %rbx, %r10
adcxq %rcx, %r11
# A[1] * B[2]
movq 112(%rsp), %rdx
mulxq 136(%rsp), %rcx, %rbx
adcxq %r14, %r12
adoxq %rcx, %r11
adcxq %r15, %r13
adoxq %rbx, %r12
# A[0] * B[2]
mulxq %rax, %rcx, %rbx
adoxq %r15, %r13
xorq %r14, %r14
adcxq %rcx, %r10
# A[1] * B[1]
movq 104(%rsp), %rdx
mulxq 136(%rsp), %rdx, %rcx
adcxq %rbx, %r11
adoxq %rdx, %r10
# A[1] * B[3]
movq 120(%rsp), %rdx
adoxq %rcx, %r11
mulxq 136(%rsp), %rcx, %rbx
adcxq %rcx, %r12
# A[2] * B[2]
movq 112(%rsp), %rdx
mulxq 144(%rsp), %rdx, %rcx
adcxq %rbx, %r13
adoxq %rdx, %r12
# A[3] * B[3]
movq 120(%rsp), %rdx
adoxq %rcx, %r13
mulxq 152(%rsp), %rcx, %rbx
adoxq %r15, %r14
adcxq %rcx, %r14
# A[0] * B[3]
mulxq %rax, %rdx, %rcx
adcxq %rbx, %r15
xorq %rbx, %rbx
adcxq %rdx, %r11
# A[3] * B[0]
movq 152(%rsp), %rdx
adcxq %rcx, %r12
mulxq 96(%rsp), %rdx, %rcx
adoxq %rdx, %r11
adoxq %rcx, %r12
# A[3] * B[2]
movq 152(%rsp), %rdx
mulxq 112(%rsp), %rdx, %rcx
adcxq %rdx, %r13
# A[2] * B[3]
movq 120(%rsp), %rdx
adcxq %rcx, %r14
mulxq 144(%rsp), %rcx, %rdx
adcxq %rbx, %r15
adoxq %rcx, %r13
adoxq %rdx, %r14
adoxq %rbx, %r15
movq $38, %rdx
mulxq %r15, %r15, %rcx
addq %r15, %r11
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r11, %rcx
imulq $19, %rcx, %rcx
andq %rbx, %r11
xorq %rbx, %rbx
adoxq %rcx, %r8
mulxq %r12, %rcx, %r12
adcxq %rcx, %r8
adoxq %r12, %r9
mulxq %r13, %rcx, %r13
adcxq %rcx, %r9
adoxq %r13, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
adcxq %rbx, %r11
# Store
movq %r8, (%rsp)
movq %r9, 8(%rsp)
movq %r10, 16(%rsp)
movq %r11, 24(%rsp)
decq %rbp
cmpq $3, %rbp
jge L_curve25519_base_avx2_bits
movq 168(%rsp), %rax
negq %rax
# Conditional Swap
movq (%rdi), %r8
movq 8(%rdi), %r9
movq 16(%rdi), %r10
movq 24(%rdi), %r11
movq (%rsp), %r12
movq 8(%rsp), %r13
movq 16(%rsp), %r14
movq 24(%rsp), %r15
xorq 64(%rsp), %r8
xorq 72(%rsp), %r9
xorq 80(%rsp), %r10
xorq 88(%rsp), %r11
xorq 32(%rsp), %r12
xorq 40(%rsp), %r13
xorq 48(%rsp), %r14
xorq 56(%rsp), %r15
andq %rax, %r8
andq %rax, %r9
andq %rax, %r10
andq %rax, %r11
andq %rax, %r12
andq %rax, %r13
andq %rax, %r14
andq %rax, %r15
xorq %r8, (%rdi)
xorq %r9, 8(%rdi)
xorq %r10, 16(%rdi)
xorq %r11, 24(%rdi)
xorq %r12, (%rsp)
xorq %r13, 8(%rsp)
xorq %r14, 16(%rsp)
xorq %r15, 24(%rsp)
xorq %r8, 64(%rsp)
xorq %r9, 72(%rsp)
xorq %r10, 80(%rsp)
xorq %r11, 88(%rsp)
xorq %r12, 32(%rsp)
xorq %r13, 40(%rsp)
xorq %r14, 48(%rsp)
xorq %r15, 56(%rsp)
L_curve25519_base_avx2_last_3:
# Add-Sub
# Add
movq (%rdi), %r8
movq 8(%rdi), %r9
movq 16(%rdi), %r10
movq 24(%rdi), %r11
movq %r8, %r12
addq (%rsp), %r8
movq %r9, %r13
adcq 8(%rsp), %r9
movq %r10, %r14
adcq 16(%rsp), %r10
movq %r11, %r15
adcq 24(%rsp), %r11
movq $0x00, %rbx
adcq $0x00, %rbx
shldq $0x01, %r11, %rbx
imulq $19, %rbx
btr $63, %r11
# Sub modulus (if overflow)
addq %rbx, %r8
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
# Sub
subq (%rsp), %r12
sbbq 8(%rsp), %r13
sbbq 16(%rsp), %r14
sbbq 24(%rsp), %r15
sbbq %rbx, %rbx
shldq $0x01, %r15, %rbx
imulq $-19, %rbx
btr $63, %r15
# Add modulus (if underflow)
subq %rbx, %r12
sbbq $0x00, %r13
sbbq $0x00, %r14
sbbq $0x00, %r15
movq %r8, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
movq %r12, 128(%rsp)
movq %r13, 136(%rsp)
movq %r14, 144(%rsp)
movq %r15, 152(%rsp)
# Square
movq 128(%rsp), %rdx
movq 136(%rsp), %rax
# A[0] * A[1]
movq %rdx, %r15
mulxq %rax, %r9, %r10
# A[0] * A[3]
mulxq 152(%rsp), %r11, %r12
# A[2] * A[1]
movq 144(%rsp), %rdx
mulxq %rax, %rcx, %rbx
xorq %r8, %r8
adoxq %rcx, %r11
# A[2] * A[3]
mulxq 152(%rsp), %r13, %r14
adoxq %rbx, %r12
# A[2] * A[0]
mulxq %r15, %rcx, %rbx
adoxq %r8, %r13
adcxq %rcx, %r10
adoxq %r8, %r14
# A[1] * A[3]
movq %rax, %rdx
mulxq 152(%rsp), %rcx, %rdx
adcxq %rbx, %r11
adcxq %rcx, %r12
adcxq %rdx, %r13
adcxq %r8, %r14
# A[0] * A[0]
movq %r15, %rdx
mulxq %rdx, %r8, %rcx
xorq %r15, %r15
adcxq %r9, %r9
# A[1] * A[1]
movq %rax, %rdx
adoxq %rcx, %r9
mulxq %rdx, %rcx, %rbx
adcxq %r10, %r10
adoxq %rcx, %r10
adcxq %r11, %r11
# A[2] * A[2]
movq 144(%rsp), %rdx
adoxq %rbx, %r11
mulxq %rdx, %rbx, %rcx
adcxq %r12, %r12
adoxq %rbx, %r12
adcxq %r13, %r13
# A[3] * A[3]
movq 152(%rsp), %rdx
adoxq %rcx, %r13
mulxq %rdx, %rcx, %rbx
adcxq %r14, %r14
adoxq %rcx, %r14
adcxq %r15, %r15
adoxq %rbx, %r15
movq $38, %rdx
mulxq %r15, %r15, %rbx
addq %r15, %r11
adcq $0x00, %rbx
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r11, %rbx
imulq $19, %rbx, %rbx
andq %rcx, %r11
xorq %rcx, %rcx
adoxq %rbx, %r8
mulxq %r12, %rbx, %r12
adcxq %rbx, %r8
adoxq %r12, %r9
mulxq %r13, %rbx, %r13
adcxq %rbx, %r9
adoxq %r13, %r10
mulxq %r14, %rbx, %r14
adcxq %rbx, %r10
adoxq %r14, %r11
adcxq %rcx, %r11
# Store
movq %r8, 96(%rsp)
movq %r9, 104(%rsp)
movq %r10, 112(%rsp)
movq %r11, 120(%rsp)
# Square
movq (%rdi), %rdx
movq 8(%rdi), %rax
# A[0] * A[1]
movq %rdx, %r15
mulxq %rax, %r9, %r10
# A[0] * A[3]
mulxq 24(%rdi), %r11, %r12
# A[2] * A[1]
movq 16(%rdi), %rdx
mulxq %rax, %rcx, %rbx
xorq %r8, %r8
adoxq %rcx, %r11
# A[2] * A[3]
mulxq 24(%rdi), %r13, %r14
adoxq %rbx, %r12
# A[2] * A[0]
mulxq %r15, %rcx, %rbx
adoxq %r8, %r13
adcxq %rcx, %r10
adoxq %r8, %r14
# A[1] * A[3]
movq %rax, %rdx
mulxq 24(%rdi), %rcx, %rdx
adcxq %rbx, %r11
adcxq %rcx, %r12
adcxq %rdx, %r13
adcxq %r8, %r14
# A[0] * A[0]
movq %r15, %rdx
mulxq %rdx, %r8, %rcx
xorq %r15, %r15
adcxq %r9, %r9
# A[1] * A[1]
movq %rax, %rdx
adoxq %rcx, %r9
mulxq %rdx, %rcx, %rbx
adcxq %r10, %r10
adoxq %rcx, %r10
adcxq %r11, %r11
# A[2] * A[2]
movq 16(%rdi), %rdx
adoxq %rbx, %r11
mulxq %rdx, %rbx, %rcx
adcxq %r12, %r12
adoxq %rbx, %r12
adcxq %r13, %r13
# A[3] * A[3]
movq 24(%rdi), %rdx
adoxq %rcx, %r13
mulxq %rdx, %rcx, %rbx
adcxq %r14, %r14
adoxq %rcx, %r14
adcxq %r15, %r15
adoxq %rbx, %r15
movq $38, %rdx
mulxq %r15, %r15, %rbx
addq %r15, %r11
adcq $0x00, %rbx
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r11, %rbx
imulq $19, %rbx, %rbx
andq %rcx, %r11
xorq %rcx, %rcx
adoxq %rbx, %r8
mulxq %r12, %rbx, %r12
adcxq %rbx, %r8
adoxq %r12, %r9
mulxq %r13, %rbx, %r13
adcxq %rbx, %r9
adoxq %r13, %r10
mulxq %r14, %rbx, %r14
adcxq %rbx, %r10
adoxq %r14, %r11
adcxq %rcx, %r11
# Store
movq %r8, 128(%rsp)
movq %r9, 136(%rsp)
movq %r10, 144(%rsp)
movq %r11, 152(%rsp)
movq 128(%rsp), %rax
# Multiply
# A[0] * B[0]
movq 96(%rsp), %rdx
mulxq %rax, %r8, %r9
# A[2] * B[0]
mulxq 144(%rsp), %r10, %r11
# A[1] * B[0]
mulxq 136(%rsp), %rcx, %rbx
xorq %r15, %r15
adcxq %rcx, %r9
# A[3] * B[1]
movq 104(%rsp), %rdx
mulxq 152(%rsp), %r12, %r13
adcxq %rbx, %r10
# A[0] * B[1]
mulxq %rax, %rcx, %rbx
adoxq %rcx, %r9
# A[2] * B[1]
mulxq 144(%rsp), %rcx, %r14
adoxq %rbx, %r10
adcxq %rcx, %r11
# A[1] * B[2]
movq 112(%rsp), %rdx
mulxq 136(%rsp), %rcx, %rbx
adcxq %r14, %r12
adoxq %rcx, %r11
adcxq %r15, %r13
adoxq %rbx, %r12
# A[0] * B[2]
mulxq %rax, %rcx, %rbx
adoxq %r15, %r13
xorq %r14, %r14
adcxq %rcx, %r10
# A[1] * B[1]
movq 104(%rsp), %rdx
mulxq 136(%rsp), %rdx, %rcx
adcxq %rbx, %r11
adoxq %rdx, %r10
# A[1] * B[3]
movq 120(%rsp), %rdx
adoxq %rcx, %r11
mulxq 136(%rsp), %rcx, %rbx
adcxq %rcx, %r12
# A[2] * B[2]
movq 112(%rsp), %rdx
mulxq 144(%rsp), %rdx, %rcx
adcxq %rbx, %r13
adoxq %rdx, %r12
# A[3] * B[3]
movq 120(%rsp), %rdx
adoxq %rcx, %r13
mulxq 152(%rsp), %rcx, %rbx
adoxq %r15, %r14
adcxq %rcx, %r14
# A[0] * B[3]
mulxq %rax, %rdx, %rcx
adcxq %rbx, %r15
xorq %rbx, %rbx
adcxq %rdx, %r11
# A[3] * B[0]
movq 152(%rsp), %rdx
adcxq %rcx, %r12
mulxq 96(%rsp), %rdx, %rcx
adoxq %rdx, %r11
adoxq %rcx, %r12
# A[3] * B[2]
movq 152(%rsp), %rdx
mulxq 112(%rsp), %rdx, %rcx
adcxq %rdx, %r13
# A[2] * B[3]
movq 120(%rsp), %rdx
adcxq %rcx, %r14
mulxq 144(%rsp), %rcx, %rdx
adcxq %rbx, %r15
adoxq %rcx, %r13
adoxq %rdx, %r14
adoxq %rbx, %r15
movq $38, %rdx
mulxq %r15, %r15, %rcx
addq %r15, %r11
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r11, %rcx
imulq $19, %rcx, %rcx
andq %rbx, %r11
xorq %rbx, %rbx
adoxq %rcx, %r8
mulxq %r12, %rcx, %r12
adcxq %rcx, %r8
adoxq %r12, %r9
mulxq %r13, %rcx, %r13
adcxq %rcx, %r9
adoxq %r13, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
adcxq %rbx, %r11
# Store
movq %r8, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
# Sub
movq 128(%rsp), %r8
movq 136(%rsp), %r9
movq 144(%rsp), %r10
movq 152(%rsp), %r11
subq 96(%rsp), %r8
sbbq 104(%rsp), %r9
sbbq 112(%rsp), %r10
sbbq 120(%rsp), %r11
sbbq %rbx, %rbx
shldq $0x01, %r11, %rbx
imulq $-19, %rbx
btr $63, %r11
# Add modulus (if underflow)
subq %rbx, %r8
sbbq $0x00, %r9
sbbq $0x00, %r10
sbbq $0x00, %r11
movq %r8, 128(%rsp)
movq %r9, 136(%rsp)
movq %r10, 144(%rsp)
movq %r11, 152(%rsp)
movq $0x1db42, %rdx
mulxq 128(%rsp), %r8, %r15
mulxq 136(%rsp), %r9, %r14
mulxq 144(%rsp), %r10, %r13
addq %r15, %r9
mulxq 152(%rsp), %r11, %r12
adcq %r14, %r10
adcq %r13, %r11
adcq $0x00, %r12
addq 96(%rsp), %r8
adcq 104(%rsp), %r9
adcq 112(%rsp), %r10
adcq 120(%rsp), %r11
adcq $0x00, %r12
shldq $0x01, %r11, %r12
btr $63, %r11
imulq $19, %r12, %r12
addq %r12, %r8
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
movq %r8, 96(%rsp)
movq %r9, 104(%rsp)
movq %r10, 112(%rsp)
movq %r11, 120(%rsp)
movq 128(%rsp), %rax
# Multiply
# A[0] * B[0]
movq 96(%rsp), %rdx
mulxq %rax, %r8, %r9
# A[2] * B[0]
mulxq 144(%rsp), %r10, %r11
# A[1] * B[0]
mulxq 136(%rsp), %rcx, %rbx
xorq %r15, %r15
adcxq %rcx, %r9
# A[3] * B[1]
movq 104(%rsp), %rdx
mulxq 152(%rsp), %r12, %r13
adcxq %rbx, %r10
# A[0] * B[1]
mulxq %rax, %rcx, %rbx
adoxq %rcx, %r9
# A[2] * B[1]
mulxq 144(%rsp), %rcx, %r14
adoxq %rbx, %r10
adcxq %rcx, %r11
# A[1] * B[2]
movq 112(%rsp), %rdx
mulxq 136(%rsp), %rcx, %rbx
adcxq %r14, %r12
adoxq %rcx, %r11
adcxq %r15, %r13
adoxq %rbx, %r12
# A[0] * B[2]
mulxq %rax, %rcx, %rbx
adoxq %r15, %r13
xorq %r14, %r14
adcxq %rcx, %r10
# A[1] * B[1]
movq 104(%rsp), %rdx
mulxq 136(%rsp), %rdx, %rcx
adcxq %rbx, %r11
adoxq %rdx, %r10
# A[1] * B[3]
movq 120(%rsp), %rdx
adoxq %rcx, %r11
mulxq 136(%rsp), %rcx, %rbx
adcxq %rcx, %r12
# A[2] * B[2]
movq 112(%rsp), %rdx
mulxq 144(%rsp), %rdx, %rcx
adcxq %rbx, %r13
adoxq %rdx, %r12
# A[3] * B[3]
movq 120(%rsp), %rdx
adoxq %rcx, %r13
mulxq 152(%rsp), %rcx, %rbx
adoxq %r15, %r14
adcxq %rcx, %r14
# A[0] * B[3]
mulxq %rax, %rdx, %rcx
adcxq %rbx, %r15
xorq %rbx, %rbx
adcxq %rdx, %r11
# A[3] * B[0]
movq 152(%rsp), %rdx
adcxq %rcx, %r12
mulxq 96(%rsp), %rdx, %rcx
adoxq %rdx, %r11
adoxq %rcx, %r12
# A[3] * B[2]
movq 152(%rsp), %rdx
mulxq 112(%rsp), %rdx, %rcx
adcxq %rdx, %r13
# A[2] * B[3]
movq 120(%rsp), %rdx
adcxq %rcx, %r14
mulxq 144(%rsp), %rcx, %rdx
adcxq %rbx, %r15
adoxq %rcx, %r13
adoxq %rdx, %r14
adoxq %rbx, %r15
movq $38, %rdx
mulxq %r15, %r15, %rcx
addq %r15, %r11
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r11, %rcx
imulq $19, %rcx, %rcx
andq %rbx, %r11
xorq %rbx, %rbx
adoxq %rcx, %r8
mulxq %r12, %rcx, %r12
adcxq %rcx, %r8
adoxq %r12, %r9
mulxq %r13, %rcx, %r13
adcxq %rcx, %r9
adoxq %r13, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
adcxq %rbx, %r11
# Store
movq %r8, (%rsp)
movq %r9, 8(%rsp)
movq %r10, 16(%rsp)
movq %r11, 24(%rsp)
decq %rbp
jge L_curve25519_base_avx2_last_3
# Invert
leaq 32(%rsp), %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
movq %rsp, %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 96(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $4, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $9, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 128(%rsp), %rdi
leaq 96(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 128(%rsp), %rdi
leaq 128(%rsp), %rsi
movq $19, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 128(%rsp), %rsi
leaq 96(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $9, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $49, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 128(%rsp), %rdi
leaq 96(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 128(%rsp), %rdi
leaq 128(%rsp), %rsi
movq $0x63, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 128(%rsp), %rsi
leaq 96(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $49, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $4, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
movq %rsp, %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
movq 160(%rsp), %rdi
movq (%rdi), %rax
# Multiply
# A[0] * B[0]
movq (%rsp), %rdx
mulxq %rax, %r8, %r9
# A[2] * B[0]
mulxq 16(%rdi), %r10, %r11
# A[1] * B[0]
mulxq 8(%rdi), %rcx, %rbx
xorq %r15, %r15
adcxq %rcx, %r9
# A[3] * B[1]
movq 8(%rsp), %rdx
mulxq 24(%rdi), %r12, %r13
adcxq %rbx, %r10
# A[0] * B[1]
mulxq %rax, %rcx, %rbx
adoxq %rcx, %r9
# A[2] * B[1]
mulxq 16(%rdi), %rcx, %r14
adoxq %rbx, %r10
adcxq %rcx, %r11
# A[1] * B[2]
movq 16(%rsp), %rdx
mulxq 8(%rdi), %rcx, %rbx
adcxq %r14, %r12
adoxq %rcx, %r11
adcxq %r15, %r13
adoxq %rbx, %r12
# A[0] * B[2]
mulxq %rax, %rcx, %rbx
adoxq %r15, %r13
xorq %r14, %r14
adcxq %rcx, %r10
# A[1] * B[1]
movq 8(%rsp), %rdx
mulxq 8(%rdi), %rdx, %rcx
adcxq %rbx, %r11
adoxq %rdx, %r10
# A[1] * B[3]
movq 24(%rsp), %rdx
adoxq %rcx, %r11
mulxq 8(%rdi), %rcx, %rbx
adcxq %rcx, %r12
# A[2] * B[2]
movq 16(%rsp), %rdx
mulxq 16(%rdi), %rdx, %rcx
adcxq %rbx, %r13
adoxq %rdx, %r12
# A[3] * B[3]
movq 24(%rsp), %rdx
adoxq %rcx, %r13
mulxq 24(%rdi), %rcx, %rbx
adoxq %r15, %r14
adcxq %rcx, %r14
# A[0] * B[3]
mulxq %rax, %rdx, %rcx
adcxq %rbx, %r15
xorq %rbx, %rbx
adcxq %rdx, %r11
# A[3] * B[0]
movq 24(%rdi), %rdx
adcxq %rcx, %r12
mulxq (%rsp), %rdx, %rcx
adoxq %rdx, %r11
adoxq %rcx, %r12
# A[3] * B[2]
movq 24(%rdi), %rdx
mulxq 16(%rsp), %rdx, %rcx
adcxq %rdx, %r13
# A[2] * B[3]
movq 24(%rsp), %rdx
adcxq %rcx, %r14
mulxq 16(%rdi), %rcx, %rdx
adcxq %rbx, %r15
adoxq %rcx, %r13
adoxq %rdx, %r14
adoxq %rbx, %r15
movq $38, %rdx
mulxq %r15, %r15, %rcx
addq %r15, %r11
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r11, %rcx
imulq $19, %rcx, %rcx
andq %rbx, %r11
xorq %rbx, %rbx
adoxq %rcx, %r8
mulxq %r12, %rcx, %r12
adcxq %rcx, %r8
adoxq %r12, %r9
mulxq %r13, %rcx, %r13
adcxq %rcx, %r9
adoxq %r13, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
adcxq %rbx, %r11
movq $0x7fffffffffffffff, %rbx
movq %r11, %rdx
sarq $63, %rdx
andq $19, %rdx
andq %rbx, %r11
addq %rdx, %r8
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
movq $0x7fffffffffffffff, %rcx
movq %r8, %rdx
addq $19, %rdx
movq %r9, %rdx
adcq $0x00, %rdx
movq %r10, %rdx
adcq $0x00, %rdx
movq %r11, %rdx
adcq $0x00, %rdx
sarq $63, %rdx
andq $19, %rdx
andq %rcx, %r11
addq %rdx, %r8
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
# Store
movq %r8, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
xorq %rax, %rax
addq $0xb0, %rsp
popq %rbp
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size curve25519_base_avx2,.-curve25519_base_avx2
#endif /* __APPLE__ */
#endif /* WOLFSSL_CURVE25519_NOT_USE_ED25519 */
#ifndef __APPLE__
.text
.globl curve25519_avx2
.type curve25519_avx2,@function
.align 16
curve25519_avx2:
#else
.section __TEXT,__text
.globl _curve25519_avx2
.p2align 4
_curve25519_avx2:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbp
movq %rdx, %r8
subq $0xb8, %rsp
movq $0x00, 176(%rsp)
movq %rdi, 168(%rsp)
# Set one
movq $0x01, (%rdi)
movq $0x00, 8(%rdi)
movq $0x00, 16(%rdi)
movq $0x00, 24(%rdi)
# Set zero
movq $0x00, (%rsp)
movq $0x00, 8(%rsp)
movq $0x00, 16(%rsp)
movq $0x00, 24(%rsp)
# Set one
movq $0x01, 32(%rsp)
movq $0x00, 40(%rsp)
movq $0x00, 48(%rsp)
movq $0x00, 56(%rsp)
# Copy
movq (%r8), %r9
movq 8(%r8), %r10
movq 16(%r8), %r11
movq 24(%r8), %r12
movq %r9, 64(%rsp)
movq %r10, 72(%rsp)
movq %r11, 80(%rsp)
movq %r12, 88(%rsp)
movq $0xfe, %rbx
L_curve25519_avx2_bits:
movq %rbx, 160(%rsp)
movq %rbx, %rcx
movq 176(%rsp), %rax
andq $63, %rcx
shrq $6, %rbx
movq (%rsi,%rbx,8), %rbx
shrq %cl, %rbx
andq $0x01, %rbx
xorq %rbx, %rax
movq %rbx, 176(%rsp)
negq %rax
# Conditional Swap
movq (%rdi), %r9
movq 8(%rdi), %r10
movq 16(%rdi), %r11
movq 24(%rdi), %r12
movq (%rsp), %r13
movq 8(%rsp), %r14
movq 16(%rsp), %r15
movq 24(%rsp), %rbp
xorq 64(%rsp), %r9
xorq 72(%rsp), %r10
xorq 80(%rsp), %r11
xorq 88(%rsp), %r12
xorq 32(%rsp), %r13
xorq 40(%rsp), %r14
xorq 48(%rsp), %r15
xorq 56(%rsp), %rbp
andq %rax, %r9
andq %rax, %r10
andq %rax, %r11
andq %rax, %r12
andq %rax, %r13
andq %rax, %r14
andq %rax, %r15
andq %rax, %rbp
xorq %r9, (%rdi)
xorq %r10, 8(%rdi)
xorq %r11, 16(%rdi)
xorq %r12, 24(%rdi)
xorq %r13, (%rsp)
xorq %r14, 8(%rsp)
xorq %r15, 16(%rsp)
xorq %rbp, 24(%rsp)
xorq %r9, 64(%rsp)
xorq %r10, 72(%rsp)
xorq %r11, 80(%rsp)
xorq %r12, 88(%rsp)
xorq %r13, 32(%rsp)
xorq %r14, 40(%rsp)
xorq %r15, 48(%rsp)
xorq %rbp, 56(%rsp)
# Add-Sub
# Add
movq (%rdi), %r9
movq 8(%rdi), %r10
movq 16(%rdi), %r11
movq 24(%rdi), %r12
movq %r9, %r13
addq (%rsp), %r9
movq %r10, %r14
adcq 8(%rsp), %r10
movq %r11, %r15
adcq 16(%rsp), %r11
movq %r12, %rbp
adcq 24(%rsp), %r12
movq $0x00, %rbx
adcq $0x00, %rbx
shldq $0x01, %r12, %rbx
imulq $19, %rbx
btr $63, %r12
# Sub modulus (if overflow)
addq %rbx, %r9
adcq $0x00, %r10
adcq $0x00, %r11
adcq $0x00, %r12
# Sub
subq (%rsp), %r13
sbbq 8(%rsp), %r14
sbbq 16(%rsp), %r15
sbbq 24(%rsp), %rbp
sbbq %rbx, %rbx
shldq $0x01, %rbp, %rbx
imulq $-19, %rbx
btr $63, %rbp
# Add modulus (if underflow)
subq %rbx, %r13
sbbq $0x00, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbp
movq %r9, (%rdi)
movq %r10, 8(%rdi)
movq %r11, 16(%rdi)
movq %r12, 24(%rdi)
movq %r13, 128(%rsp)
movq %r14, 136(%rsp)
movq %r15, 144(%rsp)
movq %rbp, 152(%rsp)
# Add-Sub
# Add
movq 64(%rsp), %r9
movq 72(%rsp), %r10
movq 80(%rsp), %r11
movq 88(%rsp), %r12
movq %r9, %r13
addq 32(%rsp), %r9
movq %r10, %r14
adcq 40(%rsp), %r10
movq %r11, %r15
adcq 48(%rsp), %r11
movq %r12, %rbp
adcq 56(%rsp), %r12
movq $0x00, %rbx
adcq $0x00, %rbx
shldq $0x01, %r12, %rbx
imulq $19, %rbx
btr $63, %r12
# Sub modulus (if overflow)
addq %rbx, %r9
adcq $0x00, %r10
adcq $0x00, %r11
adcq $0x00, %r12
# Sub
subq 32(%rsp), %r13
sbbq 40(%rsp), %r14
sbbq 48(%rsp), %r15
sbbq 56(%rsp), %rbp
sbbq %rbx, %rbx
shldq $0x01, %rbp, %rbx
imulq $-19, %rbx
btr $63, %rbp
# Add modulus (if underflow)
subq %rbx, %r13
sbbq $0x00, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbp
movq %r9, 32(%rsp)
movq %r10, 40(%rsp)
movq %r11, 48(%rsp)
movq %r12, 56(%rsp)
movq %r13, 96(%rsp)
movq %r14, 104(%rsp)
movq %r15, 112(%rsp)
movq %rbp, 120(%rsp)
movq 32(%rsp), %rax
# Multiply
# A[0] * B[0]
movq 128(%rsp), %rdx
mulxq %rax, %r9, %r10
# A[2] * B[0]
mulxq 48(%rsp), %r11, %r12
# A[1] * B[0]
mulxq 40(%rsp), %rcx, %rbx
xorq %rbp, %rbp
adcxq %rcx, %r10
# A[3] * B[1]
movq 136(%rsp), %rdx
mulxq 56(%rsp), %r13, %r14
adcxq %rbx, %r11
# A[0] * B[1]
mulxq %rax, %rcx, %rbx
adoxq %rcx, %r10
# A[2] * B[1]
mulxq 48(%rsp), %rcx, %r15
adoxq %rbx, %r11
adcxq %rcx, %r12
# A[1] * B[2]
movq 144(%rsp), %rdx
mulxq 40(%rsp), %rcx, %rbx
adcxq %r15, %r13
adoxq %rcx, %r12
adcxq %rbp, %r14
adoxq %rbx, %r13
# A[0] * B[2]
mulxq %rax, %rcx, %rbx
adoxq %rbp, %r14
xorq %r15, %r15
adcxq %rcx, %r11
# A[1] * B[1]
movq 136(%rsp), %rdx
mulxq 40(%rsp), %rdx, %rcx
adcxq %rbx, %r12
adoxq %rdx, %r11
# A[1] * B[3]
movq 152(%rsp), %rdx
adoxq %rcx, %r12
mulxq 40(%rsp), %rcx, %rbx
adcxq %rcx, %r13
# A[2] * B[2]
movq 144(%rsp), %rdx
mulxq 48(%rsp), %rdx, %rcx
adcxq %rbx, %r14
adoxq %rdx, %r13
# A[3] * B[3]
movq 152(%rsp), %rdx
adoxq %rcx, %r14
mulxq 56(%rsp), %rcx, %rbx
adoxq %rbp, %r15
adcxq %rcx, %r15
# A[0] * B[3]
mulxq %rax, %rdx, %rcx
adcxq %rbx, %rbp
xorq %rbx, %rbx
adcxq %rdx, %r12
# A[3] * B[0]
movq 56(%rsp), %rdx
adcxq %rcx, %r13
mulxq 128(%rsp), %rdx, %rcx
adoxq %rdx, %r12
adoxq %rcx, %r13
# A[3] * B[2]
movq 56(%rsp), %rdx
mulxq 144(%rsp), %rdx, %rcx
adcxq %rdx, %r14
# A[2] * B[3]
movq 152(%rsp), %rdx
adcxq %rcx, %r15
mulxq 48(%rsp), %rcx, %rdx
adcxq %rbx, %rbp
adoxq %rcx, %r14
adoxq %rdx, %r15
adoxq %rbx, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rcx
addq %rbp, %r12
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r12, %rcx
imulq $19, %rcx, %rcx
andq %rbx, %r12
xorq %rbx, %rbx
adoxq %rcx, %r9
mulxq %r13, %rcx, %r13
adcxq %rcx, %r9
adoxq %r13, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
mulxq %r15, %rcx, %r15
adcxq %rcx, %r11
adoxq %r15, %r12
adcxq %rbx, %r12
# Store
movq %r9, 32(%rsp)
movq %r10, 40(%rsp)
movq %r11, 48(%rsp)
movq %r12, 56(%rsp)
movq 96(%rsp), %rax
# Multiply
# A[0] * B[0]
movq (%rdi), %rdx
mulxq %rax, %r9, %r10
# A[2] * B[0]
mulxq 112(%rsp), %r11, %r12
# A[1] * B[0]
mulxq 104(%rsp), %rcx, %rbx
xorq %rbp, %rbp
adcxq %rcx, %r10
# A[3] * B[1]
movq 8(%rdi), %rdx
mulxq 120(%rsp), %r13, %r14
adcxq %rbx, %r11
# A[0] * B[1]
mulxq %rax, %rcx, %rbx
adoxq %rcx, %r10
# A[2] * B[1]
mulxq 112(%rsp), %rcx, %r15
adoxq %rbx, %r11
adcxq %rcx, %r12
# A[1] * B[2]
movq 16(%rdi), %rdx
mulxq 104(%rsp), %rcx, %rbx
adcxq %r15, %r13
adoxq %rcx, %r12
adcxq %rbp, %r14
adoxq %rbx, %r13
# A[0] * B[2]
mulxq %rax, %rcx, %rbx
adoxq %rbp, %r14
xorq %r15, %r15
adcxq %rcx, %r11
# A[1] * B[1]
movq 8(%rdi), %rdx
mulxq 104(%rsp), %rdx, %rcx
adcxq %rbx, %r12
adoxq %rdx, %r11
# A[1] * B[3]
movq 24(%rdi), %rdx
adoxq %rcx, %r12
mulxq 104(%rsp), %rcx, %rbx
adcxq %rcx, %r13
# A[2] * B[2]
movq 16(%rdi), %rdx
mulxq 112(%rsp), %rdx, %rcx
adcxq %rbx, %r14
adoxq %rdx, %r13
# A[3] * B[3]
movq 24(%rdi), %rdx
adoxq %rcx, %r14
mulxq 120(%rsp), %rcx, %rbx
adoxq %rbp, %r15
adcxq %rcx, %r15
# A[0] * B[3]
mulxq %rax, %rdx, %rcx
adcxq %rbx, %rbp
xorq %rbx, %rbx
adcxq %rdx, %r12
# A[3] * B[0]
movq 120(%rsp), %rdx
adcxq %rcx, %r13
mulxq (%rdi), %rdx, %rcx
adoxq %rdx, %r12
adoxq %rcx, %r13
# A[3] * B[2]
movq 120(%rsp), %rdx
mulxq 16(%rdi), %rdx, %rcx
adcxq %rdx, %r14
# A[2] * B[3]
movq 24(%rdi), %rdx
adcxq %rcx, %r15
mulxq 112(%rsp), %rcx, %rdx
adcxq %rbx, %rbp
adoxq %rcx, %r14
adoxq %rdx, %r15
adoxq %rbx, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rcx
addq %rbp, %r12
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r12, %rcx
imulq $19, %rcx, %rcx
andq %rbx, %r12
xorq %rbx, %rbx
adoxq %rcx, %r9
mulxq %r13, %rcx, %r13
adcxq %rcx, %r9
adoxq %r13, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
mulxq %r15, %rcx, %r15
adcxq %rcx, %r11
adoxq %r15, %r12
adcxq %rbx, %r12
# Store
movq %r9, (%rsp)
movq %r10, 8(%rsp)
movq %r11, 16(%rsp)
movq %r12, 24(%rsp)
# Square
movq 128(%rsp), %rdx
movq 136(%rsp), %rax
# A[0] * A[1]
movq %rdx, %rbp
mulxq %rax, %r10, %r11
# A[0] * A[3]
mulxq 152(%rsp), %r12, %r13
# A[2] * A[1]
movq 144(%rsp), %rdx
mulxq %rax, %rcx, %rbx
xorq %r9, %r9
adoxq %rcx, %r12
# A[2] * A[3]
mulxq 152(%rsp), %r14, %r15
adoxq %rbx, %r13
# A[2] * A[0]
mulxq %rbp, %rcx, %rbx
adoxq %r9, %r14
adcxq %rcx, %r11
adoxq %r9, %r15
# A[1] * A[3]
movq %rax, %rdx
mulxq 152(%rsp), %rcx, %rdx
adcxq %rbx, %r12
adcxq %rcx, %r13
adcxq %rdx, %r14
adcxq %r9, %r15
# A[0] * A[0]
movq %rbp, %rdx
mulxq %rdx, %r9, %rcx
xorq %rbp, %rbp
adcxq %r10, %r10
# A[1] * A[1]
movq %rax, %rdx
adoxq %rcx, %r10
mulxq %rdx, %rcx, %rbx
adcxq %r11, %r11
adoxq %rcx, %r11
adcxq %r12, %r12
# A[2] * A[2]
movq 144(%rsp), %rdx
adoxq %rbx, %r12
mulxq %rdx, %rbx, %rcx
adcxq %r13, %r13
adoxq %rbx, %r13
adcxq %r14, %r14
# A[3] * A[3]
movq 152(%rsp), %rdx
adoxq %rcx, %r14
mulxq %rdx, %rcx, %rbx
adcxq %r15, %r15
adoxq %rcx, %r15
adcxq %rbp, %rbp
adoxq %rbx, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rbx
addq %rbp, %r12
adcq $0x00, %rbx
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r12, %rbx
imulq $19, %rbx, %rbx
andq %rcx, %r12
xorq %rcx, %rcx
adoxq %rbx, %r9
mulxq %r13, %rbx, %r13
adcxq %rbx, %r9
adoxq %r13, %r10
mulxq %r14, %rbx, %r14
adcxq %rbx, %r10
adoxq %r14, %r11
mulxq %r15, %rbx, %r15
adcxq %rbx, %r11
adoxq %r15, %r12
adcxq %rcx, %r12
# Store
movq %r9, 96(%rsp)
movq %r10, 104(%rsp)
movq %r11, 112(%rsp)
movq %r12, 120(%rsp)
# Square
movq (%rdi), %rdx
movq 8(%rdi), %rax
# A[0] * A[1]
movq %rdx, %rbp
mulxq %rax, %r10, %r11
# A[0] * A[3]
mulxq 24(%rdi), %r12, %r13
# A[2] * A[1]
movq 16(%rdi), %rdx
mulxq %rax, %rcx, %rbx
xorq %r9, %r9
adoxq %rcx, %r12
# A[2] * A[3]
mulxq 24(%rdi), %r14, %r15
adoxq %rbx, %r13
# A[2] * A[0]
mulxq %rbp, %rcx, %rbx
adoxq %r9, %r14
adcxq %rcx, %r11
adoxq %r9, %r15
# A[1] * A[3]
movq %rax, %rdx
mulxq 24(%rdi), %rcx, %rdx
adcxq %rbx, %r12
adcxq %rcx, %r13
adcxq %rdx, %r14
adcxq %r9, %r15
# A[0] * A[0]
movq %rbp, %rdx
mulxq %rdx, %r9, %rcx
xorq %rbp, %rbp
adcxq %r10, %r10
# A[1] * A[1]
movq %rax, %rdx
adoxq %rcx, %r10
mulxq %rdx, %rcx, %rbx
adcxq %r11, %r11
adoxq %rcx, %r11
adcxq %r12, %r12
# A[2] * A[2]
movq 16(%rdi), %rdx
adoxq %rbx, %r12
mulxq %rdx, %rbx, %rcx
adcxq %r13, %r13
adoxq %rbx, %r13
adcxq %r14, %r14
# A[3] * A[3]
movq 24(%rdi), %rdx
adoxq %rcx, %r14
mulxq %rdx, %rcx, %rbx
adcxq %r15, %r15
adoxq %rcx, %r15
adcxq %rbp, %rbp
adoxq %rbx, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rbx
addq %rbp, %r12
adcq $0x00, %rbx
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r12, %rbx
imulq $19, %rbx, %rbx
andq %rcx, %r12
xorq %rcx, %rcx
adoxq %rbx, %r9
mulxq %r13, %rbx, %r13
adcxq %rbx, %r9
adoxq %r13, %r10
mulxq %r14, %rbx, %r14
adcxq %rbx, %r10
adoxq %r14, %r11
mulxq %r15, %rbx, %r15
adcxq %rbx, %r11
adoxq %r15, %r12
adcxq %rcx, %r12
# Store
movq %r9, 128(%rsp)
movq %r10, 136(%rsp)
movq %r11, 144(%rsp)
movq %r12, 152(%rsp)
# Add-Sub
# Add
movq (%rsp), %r9
movq 8(%rsp), %r10
movq 16(%rsp), %r11
movq 24(%rsp), %r12
movq %r9, %r13
addq 32(%rsp), %r9
movq %r10, %r14
adcq 40(%rsp), %r10
movq %r11, %r15
adcq 48(%rsp), %r11
movq %r12, %rbp
adcq 56(%rsp), %r12
movq $0x00, %rbx
adcq $0x00, %rbx
shldq $0x01, %r12, %rbx
imulq $19, %rbx
btr $63, %r12
# Sub modulus (if overflow)
addq %rbx, %r9
adcq $0x00, %r10
adcq $0x00, %r11
adcq $0x00, %r12
# Sub
subq 32(%rsp), %r13
sbbq 40(%rsp), %r14
sbbq 48(%rsp), %r15
sbbq 56(%rsp), %rbp
sbbq %rbx, %rbx
shldq $0x01, %rbp, %rbx
imulq $-19, %rbx
btr $63, %rbp
# Add modulus (if underflow)
subq %rbx, %r13
sbbq $0x00, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbp
movq %r9, 64(%rsp)
movq %r10, 72(%rsp)
movq %r11, 80(%rsp)
movq %r12, 88(%rsp)
movq %r13, 32(%rsp)
movq %r14, 40(%rsp)
movq %r15, 48(%rsp)
movq %rbp, 56(%rsp)
movq 128(%rsp), %rax
# Multiply
# A[0] * B[0]
movq 96(%rsp), %rdx
mulxq %rax, %r9, %r10
# A[2] * B[0]
mulxq 144(%rsp), %r11, %r12
# A[1] * B[0]
mulxq 136(%rsp), %rcx, %rbx
xorq %rbp, %rbp
adcxq %rcx, %r10
# A[3] * B[1]
movq 104(%rsp), %rdx
mulxq 152(%rsp), %r13, %r14
adcxq %rbx, %r11
# A[0] * B[1]
mulxq %rax, %rcx, %rbx
adoxq %rcx, %r10
# A[2] * B[1]
mulxq 144(%rsp), %rcx, %r15
adoxq %rbx, %r11
adcxq %rcx, %r12
# A[1] * B[2]
movq 112(%rsp), %rdx
mulxq 136(%rsp), %rcx, %rbx
adcxq %r15, %r13
adoxq %rcx, %r12
adcxq %rbp, %r14
adoxq %rbx, %r13
# A[0] * B[2]
mulxq %rax, %rcx, %rbx
adoxq %rbp, %r14
xorq %r15, %r15
adcxq %rcx, %r11
# A[1] * B[1]
movq 104(%rsp), %rdx
mulxq 136(%rsp), %rdx, %rcx
adcxq %rbx, %r12
adoxq %rdx, %r11
# A[1] * B[3]
movq 120(%rsp), %rdx
adoxq %rcx, %r12
mulxq 136(%rsp), %rcx, %rbx
adcxq %rcx, %r13
# A[2] * B[2]
movq 112(%rsp), %rdx
mulxq 144(%rsp), %rdx, %rcx
adcxq %rbx, %r14
adoxq %rdx, %r13
# A[3] * B[3]
movq 120(%rsp), %rdx
adoxq %rcx, %r14
mulxq 152(%rsp), %rcx, %rbx
adoxq %rbp, %r15
adcxq %rcx, %r15
# A[0] * B[3]
mulxq %rax, %rdx, %rcx
adcxq %rbx, %rbp
xorq %rbx, %rbx
adcxq %rdx, %r12
# A[3] * B[0]
movq 152(%rsp), %rdx
adcxq %rcx, %r13
mulxq 96(%rsp), %rdx, %rcx
adoxq %rdx, %r12
adoxq %rcx, %r13
# A[3] * B[2]
movq 152(%rsp), %rdx
mulxq 112(%rsp), %rdx, %rcx
adcxq %rdx, %r14
# A[2] * B[3]
movq 120(%rsp), %rdx
adcxq %rcx, %r15
mulxq 144(%rsp), %rcx, %rdx
adcxq %rbx, %rbp
adoxq %rcx, %r14
adoxq %rdx, %r15
adoxq %rbx, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rcx
addq %rbp, %r12
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r12, %rcx
imulq $19, %rcx, %rcx
andq %rbx, %r12
xorq %rbx, %rbx
adoxq %rcx, %r9
mulxq %r13, %rcx, %r13
adcxq %rcx, %r9
adoxq %r13, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
mulxq %r15, %rcx, %r15
adcxq %rcx, %r11
adoxq %r15, %r12
adcxq %rbx, %r12
# Store
movq %r9, (%rdi)
movq %r10, 8(%rdi)
movq %r11, 16(%rdi)
movq %r12, 24(%rdi)
# Sub
movq 128(%rsp), %r9
movq 136(%rsp), %r10
movq 144(%rsp), %r11
movq 152(%rsp), %r12
subq 96(%rsp), %r9
sbbq 104(%rsp), %r10
sbbq 112(%rsp), %r11
sbbq 120(%rsp), %r12
sbbq %rbx, %rbx
shldq $0x01, %r12, %rbx
imulq $-19, %rbx
btr $63, %r12
# Add modulus (if underflow)
subq %rbx, %r9
sbbq $0x00, %r10
sbbq $0x00, %r11
sbbq $0x00, %r12
movq %r9, 128(%rsp)
movq %r10, 136(%rsp)
movq %r11, 144(%rsp)
movq %r12, 152(%rsp)
# Square
movq 32(%rsp), %rdx
movq 40(%rsp), %rax
# A[0] * A[1]
movq %rdx, %rbp
mulxq %rax, %r10, %r11
# A[0] * A[3]
mulxq 56(%rsp), %r12, %r13
# A[2] * A[1]
movq 48(%rsp), %rdx
mulxq %rax, %rcx, %rbx
xorq %r9, %r9
adoxq %rcx, %r12
# A[2] * A[3]
mulxq 56(%rsp), %r14, %r15
adoxq %rbx, %r13
# A[2] * A[0]
mulxq %rbp, %rcx, %rbx
adoxq %r9, %r14
adcxq %rcx, %r11
adoxq %r9, %r15
# A[1] * A[3]
movq %rax, %rdx
mulxq 56(%rsp), %rcx, %rdx
adcxq %rbx, %r12
adcxq %rcx, %r13
adcxq %rdx, %r14
adcxq %r9, %r15
# A[0] * A[0]
movq %rbp, %rdx
mulxq %rdx, %r9, %rcx
xorq %rbp, %rbp
adcxq %r10, %r10
# A[1] * A[1]
movq %rax, %rdx
adoxq %rcx, %r10
mulxq %rdx, %rcx, %rbx
adcxq %r11, %r11
adoxq %rcx, %r11
adcxq %r12, %r12
# A[2] * A[2]
movq 48(%rsp), %rdx
adoxq %rbx, %r12
mulxq %rdx, %rbx, %rcx
adcxq %r13, %r13
adoxq %rbx, %r13
adcxq %r14, %r14
# A[3] * A[3]
movq 56(%rsp), %rdx
adoxq %rcx, %r14
mulxq %rdx, %rcx, %rbx
adcxq %r15, %r15
adoxq %rcx, %r15
adcxq %rbp, %rbp
adoxq %rbx, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rbx
addq %rbp, %r12
adcq $0x00, %rbx
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r12, %rbx
imulq $19, %rbx, %rbx
andq %rcx, %r12
xorq %rcx, %rcx
adoxq %rbx, %r9
mulxq %r13, %rbx, %r13
adcxq %rbx, %r9
adoxq %r13, %r10
mulxq %r14, %rbx, %r14
adcxq %rbx, %r10
adoxq %r14, %r11
mulxq %r15, %rbx, %r15
adcxq %rbx, %r11
adoxq %r15, %r12
adcxq %rcx, %r12
# Store
movq %r9, 32(%rsp)
movq %r10, 40(%rsp)
movq %r11, 48(%rsp)
movq %r12, 56(%rsp)
# Square
movq 64(%rsp), %rdx
movq 72(%rsp), %rax
# A[0] * A[1]
movq %rdx, %rbp
mulxq %rax, %r10, %r11
# A[0] * A[3]
mulxq 88(%rsp), %r12, %r13
# A[2] * A[1]
movq 80(%rsp), %rdx
mulxq %rax, %rcx, %rbx
xorq %r9, %r9
adoxq %rcx, %r12
# A[2] * A[3]
mulxq 88(%rsp), %r14, %r15
adoxq %rbx, %r13
# A[2] * A[0]
mulxq %rbp, %rcx, %rbx
adoxq %r9, %r14
adcxq %rcx, %r11
adoxq %r9, %r15
# A[1] * A[3]
movq %rax, %rdx
mulxq 88(%rsp), %rcx, %rdx
adcxq %rbx, %r12
adcxq %rcx, %r13
adcxq %rdx, %r14
adcxq %r9, %r15
# A[0] * A[0]
movq %rbp, %rdx
mulxq %rdx, %r9, %rcx
xorq %rbp, %rbp
adcxq %r10, %r10
# A[1] * A[1]
movq %rax, %rdx
adoxq %rcx, %r10
mulxq %rdx, %rcx, %rbx
adcxq %r11, %r11
adoxq %rcx, %r11
adcxq %r12, %r12
# A[2] * A[2]
movq 80(%rsp), %rdx
adoxq %rbx, %r12
mulxq %rdx, %rbx, %rcx
adcxq %r13, %r13
adoxq %rbx, %r13
adcxq %r14, %r14
# A[3] * A[3]
movq 88(%rsp), %rdx
adoxq %rcx, %r14
mulxq %rdx, %rcx, %rbx
adcxq %r15, %r15
adoxq %rcx, %r15
adcxq %rbp, %rbp
adoxq %rbx, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rbx
addq %rbp, %r12
adcq $0x00, %rbx
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r12, %rbx
imulq $19, %rbx, %rbx
andq %rcx, %r12
xorq %rcx, %rcx
adoxq %rbx, %r9
mulxq %r13, %rbx, %r13
adcxq %rbx, %r9
adoxq %r13, %r10
mulxq %r14, %rbx, %r14
adcxq %rbx, %r10
adoxq %r14, %r11
mulxq %r15, %rbx, %r15
adcxq %rbx, %r11
adoxq %r15, %r12
adcxq %rcx, %r12
# Store
movq %r9, 64(%rsp)
movq %r10, 72(%rsp)
movq %r11, 80(%rsp)
movq %r12, 88(%rsp)
movq $0x1db42, %rdx
mulxq 128(%rsp), %r9, %rbp
mulxq 136(%rsp), %r10, %r15
mulxq 144(%rsp), %r11, %r14
addq %rbp, %r10
mulxq 152(%rsp), %r12, %r13
adcq %r15, %r11
adcq %r14, %r12
adcq $0x00, %r13
addq 96(%rsp), %r9
adcq 104(%rsp), %r10
adcq 112(%rsp), %r11
adcq 120(%rsp), %r12
adcq $0x00, %r13
shldq $0x01, %r12, %r13
btr $63, %r12
imulq $19, %r13, %r13
addq %r13, %r9
adcq $0x00, %r10
adcq $0x00, %r11
adcq $0x00, %r12
movq %r9, 96(%rsp)
movq %r10, 104(%rsp)
movq %r11, 112(%rsp)
movq %r12, 120(%rsp)
movq (%r8), %rax
# Multiply
# A[0] * B[0]
movq 32(%rsp), %rdx
mulxq %rax, %r9, %r10
# A[2] * B[0]
mulxq 16(%r8), %r11, %r12
# A[1] * B[0]
mulxq 8(%r8), %rcx, %rbx
xorq %rbp, %rbp
adcxq %rcx, %r10
# A[3] * B[1]
movq 40(%rsp), %rdx
mulxq 24(%r8), %r13, %r14
adcxq %rbx, %r11
# A[0] * B[1]
mulxq %rax, %rcx, %rbx
adoxq %rcx, %r10
# A[2] * B[1]
mulxq 16(%r8), %rcx, %r15
adoxq %rbx, %r11
adcxq %rcx, %r12
# A[1] * B[2]
movq 48(%rsp), %rdx
mulxq 8(%r8), %rcx, %rbx
adcxq %r15, %r13
adoxq %rcx, %r12
adcxq %rbp, %r14
adoxq %rbx, %r13
# A[0] * B[2]
mulxq %rax, %rcx, %rbx
adoxq %rbp, %r14
xorq %r15, %r15
adcxq %rcx, %r11
# A[1] * B[1]
movq 40(%rsp), %rdx
mulxq 8(%r8), %rdx, %rcx
adcxq %rbx, %r12
adoxq %rdx, %r11
# A[1] * B[3]
movq 56(%rsp), %rdx
adoxq %rcx, %r12
mulxq 8(%r8), %rcx, %rbx
adcxq %rcx, %r13
# A[2] * B[2]
movq 48(%rsp), %rdx
mulxq 16(%r8), %rdx, %rcx
adcxq %rbx, %r14
adoxq %rdx, %r13
# A[3] * B[3]
movq 56(%rsp), %rdx
adoxq %rcx, %r14
mulxq 24(%r8), %rcx, %rbx
adoxq %rbp, %r15
adcxq %rcx, %r15
# A[0] * B[3]
mulxq %rax, %rdx, %rcx
adcxq %rbx, %rbp
xorq %rbx, %rbx
adcxq %rdx, %r12
# A[3] * B[0]
movq 24(%r8), %rdx
adcxq %rcx, %r13
mulxq 32(%rsp), %rdx, %rcx
adoxq %rdx, %r12
adoxq %rcx, %r13
# A[3] * B[2]
movq 24(%r8), %rdx
mulxq 48(%rsp), %rdx, %rcx
adcxq %rdx, %r14
# A[2] * B[3]
movq 56(%rsp), %rdx
adcxq %rcx, %r15
mulxq 16(%r8), %rcx, %rdx
adcxq %rbx, %rbp
adoxq %rcx, %r14
adoxq %rdx, %r15
adoxq %rbx, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rcx
addq %rbp, %r12
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r12, %rcx
imulq $19, %rcx, %rcx
andq %rbx, %r12
xorq %rbx, %rbx
adoxq %rcx, %r9
mulxq %r13, %rcx, %r13
adcxq %rcx, %r9
adoxq %r13, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
mulxq %r15, %rcx, %r15
adcxq %rcx, %r11
adoxq %r15, %r12
adcxq %rbx, %r12
# Store
movq %r9, 32(%rsp)
movq %r10, 40(%rsp)
movq %r11, 48(%rsp)
movq %r12, 56(%rsp)
movq 96(%rsp), %rax
# Multiply
# A[0] * B[0]
movq 128(%rsp), %rdx
mulxq %rax, %r9, %r10
# A[2] * B[0]
mulxq 112(%rsp), %r11, %r12
# A[1] * B[0]
mulxq 104(%rsp), %rcx, %rbx
xorq %rbp, %rbp
adcxq %rcx, %r10
# A[3] * B[1]
movq 136(%rsp), %rdx
mulxq 120(%rsp), %r13, %r14
adcxq %rbx, %r11
# A[0] * B[1]
mulxq %rax, %rcx, %rbx
adoxq %rcx, %r10
# A[2] * B[1]
mulxq 112(%rsp), %rcx, %r15
adoxq %rbx, %r11
adcxq %rcx, %r12
# A[1] * B[2]
movq 144(%rsp), %rdx
mulxq 104(%rsp), %rcx, %rbx
adcxq %r15, %r13
adoxq %rcx, %r12
adcxq %rbp, %r14
adoxq %rbx, %r13
# A[0] * B[2]
mulxq %rax, %rcx, %rbx
adoxq %rbp, %r14
xorq %r15, %r15
adcxq %rcx, %r11
# A[1] * B[1]
movq 136(%rsp), %rdx
mulxq 104(%rsp), %rdx, %rcx
adcxq %rbx, %r12
adoxq %rdx, %r11
# A[1] * B[3]
movq 152(%rsp), %rdx
adoxq %rcx, %r12
mulxq 104(%rsp), %rcx, %rbx
adcxq %rcx, %r13
# A[2] * B[2]
movq 144(%rsp), %rdx
mulxq 112(%rsp), %rdx, %rcx
adcxq %rbx, %r14
adoxq %rdx, %r13
# A[3] * B[3]
movq 152(%rsp), %rdx
adoxq %rcx, %r14
mulxq 120(%rsp), %rcx, %rbx
adoxq %rbp, %r15
adcxq %rcx, %r15
# A[0] * B[3]
mulxq %rax, %rdx, %rcx
adcxq %rbx, %rbp
xorq %rbx, %rbx
adcxq %rdx, %r12
# A[3] * B[0]
movq 120(%rsp), %rdx
adcxq %rcx, %r13
mulxq 128(%rsp), %rdx, %rcx
adoxq %rdx, %r12
adoxq %rcx, %r13
# A[3] * B[2]
movq 120(%rsp), %rdx
mulxq 144(%rsp), %rdx, %rcx
adcxq %rdx, %r14
# A[2] * B[3]
movq 152(%rsp), %rdx
adcxq %rcx, %r15
mulxq 112(%rsp), %rcx, %rdx
adcxq %rbx, %rbp
adoxq %rcx, %r14
adoxq %rdx, %r15
adoxq %rbx, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rcx
addq %rbp, %r12
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r12, %rcx
imulq $19, %rcx, %rcx
andq %rbx, %r12
xorq %rbx, %rbx
adoxq %rcx, %r9
mulxq %r13, %rcx, %r13
adcxq %rcx, %r9
adoxq %r13, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
mulxq %r15, %rcx, %r15
adcxq %rcx, %r11
adoxq %r15, %r12
adcxq %rbx, %r12
# Store
movq %r9, (%rsp)
movq %r10, 8(%rsp)
movq %r11, 16(%rsp)
movq %r12, 24(%rsp)
movq 160(%rsp), %rbx
decq %rbx
cmpq $3, %rbx
jge L_curve25519_avx2_bits
movq $2, 160(%rsp)
movq 176(%rsp), %rax
negq %rax
# Conditional Swap
movq (%rdi), %r9
movq 8(%rdi), %r10
movq 16(%rdi), %r11
movq 24(%rdi), %r12
movq (%rsp), %r13
movq 8(%rsp), %r14
movq 16(%rsp), %r15
movq 24(%rsp), %rbp
xorq 64(%rsp), %r9
xorq 72(%rsp), %r10
xorq 80(%rsp), %r11
xorq 88(%rsp), %r12
xorq 32(%rsp), %r13
xorq 40(%rsp), %r14
xorq 48(%rsp), %r15
xorq 56(%rsp), %rbp
andq %rax, %r9
andq %rax, %r10
andq %rax, %r11
andq %rax, %r12
andq %rax, %r13
andq %rax, %r14
andq %rax, %r15
andq %rax, %rbp
xorq %r9, (%rdi)
xorq %r10, 8(%rdi)
xorq %r11, 16(%rdi)
xorq %r12, 24(%rdi)
xorq %r13, (%rsp)
xorq %r14, 8(%rsp)
xorq %r15, 16(%rsp)
xorq %rbp, 24(%rsp)
xorq %r9, 64(%rsp)
xorq %r10, 72(%rsp)
xorq %r11, 80(%rsp)
xorq %r12, 88(%rsp)
xorq %r13, 32(%rsp)
xorq %r14, 40(%rsp)
xorq %r15, 48(%rsp)
xorq %rbp, 56(%rsp)
L_curve25519_avx2_last_3:
# Add-Sub
# Add
movq (%rdi), %r9
movq 8(%rdi), %r10
movq 16(%rdi), %r11
movq 24(%rdi), %r12
movq %r9, %r13
addq (%rsp), %r9
movq %r10, %r14
adcq 8(%rsp), %r10
movq %r11, %r15
adcq 16(%rsp), %r11
movq %r12, %rbp
adcq 24(%rsp), %r12
movq $0x00, %rbx
adcq $0x00, %rbx
shldq $0x01, %r12, %rbx
imulq $19, %rbx
btr $63, %r12
# Sub modulus (if overflow)
addq %rbx, %r9
adcq $0x00, %r10
adcq $0x00, %r11
adcq $0x00, %r12
# Sub
subq (%rsp), %r13
sbbq 8(%rsp), %r14
sbbq 16(%rsp), %r15
sbbq 24(%rsp), %rbp
sbbq %rbx, %rbx
shldq $0x01, %rbp, %rbx
imulq $-19, %rbx
btr $63, %rbp
# Add modulus (if underflow)
subq %rbx, %r13
sbbq $0x00, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbp
movq %r9, (%rdi)
movq %r10, 8(%rdi)
movq %r11, 16(%rdi)
movq %r12, 24(%rdi)
movq %r13, 128(%rsp)
movq %r14, 136(%rsp)
movq %r15, 144(%rsp)
movq %rbp, 152(%rsp)
# Square
movq 128(%rsp), %rdx
movq 136(%rsp), %rax
# A[0] * A[1]
movq %rdx, %rbp
mulxq %rax, %r10, %r11
# A[0] * A[3]
mulxq 152(%rsp), %r12, %r13
# A[2] * A[1]
movq 144(%rsp), %rdx
mulxq %rax, %rcx, %rbx
xorq %r9, %r9
adoxq %rcx, %r12
# A[2] * A[3]
mulxq 152(%rsp), %r14, %r15
adoxq %rbx, %r13
# A[2] * A[0]
mulxq %rbp, %rcx, %rbx
adoxq %r9, %r14
adcxq %rcx, %r11
adoxq %r9, %r15
# A[1] * A[3]
movq %rax, %rdx
mulxq 152(%rsp), %rcx, %rdx
adcxq %rbx, %r12
adcxq %rcx, %r13
adcxq %rdx, %r14
adcxq %r9, %r15
# A[0] * A[0]
movq %rbp, %rdx
mulxq %rdx, %r9, %rcx
xorq %rbp, %rbp
adcxq %r10, %r10
# A[1] * A[1]
movq %rax, %rdx
adoxq %rcx, %r10
mulxq %rdx, %rcx, %rbx
adcxq %r11, %r11
adoxq %rcx, %r11
adcxq %r12, %r12
# A[2] * A[2]
movq 144(%rsp), %rdx
adoxq %rbx, %r12
mulxq %rdx, %rbx, %rcx
adcxq %r13, %r13
adoxq %rbx, %r13
adcxq %r14, %r14
# A[3] * A[3]
movq 152(%rsp), %rdx
adoxq %rcx, %r14
mulxq %rdx, %rcx, %rbx
adcxq %r15, %r15
adoxq %rcx, %r15
adcxq %rbp, %rbp
adoxq %rbx, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rbx
addq %rbp, %r12
adcq $0x00, %rbx
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r12, %rbx
imulq $19, %rbx, %rbx
andq %rcx, %r12
xorq %rcx, %rcx
adoxq %rbx, %r9
mulxq %r13, %rbx, %r13
adcxq %rbx, %r9
adoxq %r13, %r10
mulxq %r14, %rbx, %r14
adcxq %rbx, %r10
adoxq %r14, %r11
mulxq %r15, %rbx, %r15
adcxq %rbx, %r11
adoxq %r15, %r12
adcxq %rcx, %r12
# Store
movq %r9, 96(%rsp)
movq %r10, 104(%rsp)
movq %r11, 112(%rsp)
movq %r12, 120(%rsp)
# Square
movq (%rdi), %rdx
movq 8(%rdi), %rax
# A[0] * A[1]
movq %rdx, %rbp
mulxq %rax, %r10, %r11
# A[0] * A[3]
mulxq 24(%rdi), %r12, %r13
# A[2] * A[1]
movq 16(%rdi), %rdx
mulxq %rax, %rcx, %rbx
xorq %r9, %r9
adoxq %rcx, %r12
# A[2] * A[3]
mulxq 24(%rdi), %r14, %r15
adoxq %rbx, %r13
# A[2] * A[0]
mulxq %rbp, %rcx, %rbx
adoxq %r9, %r14
adcxq %rcx, %r11
adoxq %r9, %r15
# A[1] * A[3]
movq %rax, %rdx
mulxq 24(%rdi), %rcx, %rdx
adcxq %rbx, %r12
adcxq %rcx, %r13
adcxq %rdx, %r14
adcxq %r9, %r15
# A[0] * A[0]
movq %rbp, %rdx
mulxq %rdx, %r9, %rcx
xorq %rbp, %rbp
adcxq %r10, %r10
# A[1] * A[1]
movq %rax, %rdx
adoxq %rcx, %r10
mulxq %rdx, %rcx, %rbx
adcxq %r11, %r11
adoxq %rcx, %r11
adcxq %r12, %r12
# A[2] * A[2]
movq 16(%rdi), %rdx
adoxq %rbx, %r12
mulxq %rdx, %rbx, %rcx
adcxq %r13, %r13
adoxq %rbx, %r13
adcxq %r14, %r14
# A[3] * A[3]
movq 24(%rdi), %rdx
adoxq %rcx, %r14
mulxq %rdx, %rcx, %rbx
adcxq %r15, %r15
adoxq %rcx, %r15
adcxq %rbp, %rbp
adoxq %rbx, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rbx
addq %rbp, %r12
adcq $0x00, %rbx
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r12, %rbx
imulq $19, %rbx, %rbx
andq %rcx, %r12
xorq %rcx, %rcx
adoxq %rbx, %r9
mulxq %r13, %rbx, %r13
adcxq %rbx, %r9
adoxq %r13, %r10
mulxq %r14, %rbx, %r14
adcxq %rbx, %r10
adoxq %r14, %r11
mulxq %r15, %rbx, %r15
adcxq %rbx, %r11
adoxq %r15, %r12
adcxq %rcx, %r12
# Store
movq %r9, 128(%rsp)
movq %r10, 136(%rsp)
movq %r11, 144(%rsp)
movq %r12, 152(%rsp)
movq 128(%rsp), %rax
# Multiply
# A[0] * B[0]
movq 96(%rsp), %rdx
mulxq %rax, %r9, %r10
# A[2] * B[0]
mulxq 144(%rsp), %r11, %r12
# A[1] * B[0]
mulxq 136(%rsp), %rcx, %rbx
xorq %rbp, %rbp
adcxq %rcx, %r10
# A[3] * B[1]
movq 104(%rsp), %rdx
mulxq 152(%rsp), %r13, %r14
adcxq %rbx, %r11
# A[0] * B[1]
mulxq %rax, %rcx, %rbx
adoxq %rcx, %r10
# A[2] * B[1]
mulxq 144(%rsp), %rcx, %r15
adoxq %rbx, %r11
adcxq %rcx, %r12
# A[1] * B[2]
movq 112(%rsp), %rdx
mulxq 136(%rsp), %rcx, %rbx
adcxq %r15, %r13
adoxq %rcx, %r12
adcxq %rbp, %r14
adoxq %rbx, %r13
# A[0] * B[2]
mulxq %rax, %rcx, %rbx
adoxq %rbp, %r14
xorq %r15, %r15
adcxq %rcx, %r11
# A[1] * B[1]
movq 104(%rsp), %rdx
mulxq 136(%rsp), %rdx, %rcx
adcxq %rbx, %r12
adoxq %rdx, %r11
# A[1] * B[3]
movq 120(%rsp), %rdx
adoxq %rcx, %r12
mulxq 136(%rsp), %rcx, %rbx
adcxq %rcx, %r13
# A[2] * B[2]
movq 112(%rsp), %rdx
mulxq 144(%rsp), %rdx, %rcx
adcxq %rbx, %r14
adoxq %rdx, %r13
# A[3] * B[3]
movq 120(%rsp), %rdx
adoxq %rcx, %r14
mulxq 152(%rsp), %rcx, %rbx
adoxq %rbp, %r15
adcxq %rcx, %r15
# A[0] * B[3]
mulxq %rax, %rdx, %rcx
adcxq %rbx, %rbp
xorq %rbx, %rbx
adcxq %rdx, %r12
# A[3] * B[0]
movq 152(%rsp), %rdx
adcxq %rcx, %r13
mulxq 96(%rsp), %rdx, %rcx
adoxq %rdx, %r12
adoxq %rcx, %r13
# A[3] * B[2]
movq 152(%rsp), %rdx
mulxq 112(%rsp), %rdx, %rcx
adcxq %rdx, %r14
# A[2] * B[3]
movq 120(%rsp), %rdx
adcxq %rcx, %r15
mulxq 144(%rsp), %rcx, %rdx
adcxq %rbx, %rbp
adoxq %rcx, %r14
adoxq %rdx, %r15
adoxq %rbx, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rcx
addq %rbp, %r12
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r12, %rcx
imulq $19, %rcx, %rcx
andq %rbx, %r12
xorq %rbx, %rbx
adoxq %rcx, %r9
mulxq %r13, %rcx, %r13
adcxq %rcx, %r9
adoxq %r13, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
mulxq %r15, %rcx, %r15
adcxq %rcx, %r11
adoxq %r15, %r12
adcxq %rbx, %r12
# Store
movq %r9, (%rdi)
movq %r10, 8(%rdi)
movq %r11, 16(%rdi)
movq %r12, 24(%rdi)
# Sub
movq 128(%rsp), %r9
movq 136(%rsp), %r10
movq 144(%rsp), %r11
movq 152(%rsp), %r12
subq 96(%rsp), %r9
sbbq 104(%rsp), %r10
sbbq 112(%rsp), %r11
sbbq 120(%rsp), %r12
sbbq %rbx, %rbx
shldq $0x01, %r12, %rbx
imulq $-19, %rbx
btr $63, %r12
# Add modulus (if underflow)
subq %rbx, %r9
sbbq $0x00, %r10
sbbq $0x00, %r11
sbbq $0x00, %r12
movq %r9, 128(%rsp)
movq %r10, 136(%rsp)
movq %r11, 144(%rsp)
movq %r12, 152(%rsp)
movq $0x1db42, %rdx
mulxq 128(%rsp), %r9, %rbp
mulxq 136(%rsp), %r10, %r15
mulxq 144(%rsp), %r11, %r14
addq %rbp, %r10
mulxq 152(%rsp), %r12, %r13
adcq %r15, %r11
adcq %r14, %r12
adcq $0x00, %r13
addq 96(%rsp), %r9
adcq 104(%rsp), %r10
adcq 112(%rsp), %r11
adcq 120(%rsp), %r12
adcq $0x00, %r13
shldq $0x01, %r12, %r13
btr $63, %r12
imulq $19, %r13, %r13
addq %r13, %r9
adcq $0x00, %r10
adcq $0x00, %r11
adcq $0x00, %r12
movq %r9, 96(%rsp)
movq %r10, 104(%rsp)
movq %r11, 112(%rsp)
movq %r12, 120(%rsp)
movq 96(%rsp), %rax
# Multiply
# A[0] * B[0]
movq 128(%rsp), %rdx
mulxq %rax, %r9, %r10
# A[2] * B[0]
mulxq 112(%rsp), %r11, %r12
# A[1] * B[0]
mulxq 104(%rsp), %rcx, %rbx
xorq %rbp, %rbp
adcxq %rcx, %r10
# A[3] * B[1]
movq 136(%rsp), %rdx
mulxq 120(%rsp), %r13, %r14
adcxq %rbx, %r11
# A[0] * B[1]
mulxq %rax, %rcx, %rbx
adoxq %rcx, %r10
# A[2] * B[1]
mulxq 112(%rsp), %rcx, %r15
adoxq %rbx, %r11
adcxq %rcx, %r12
# A[1] * B[2]
movq 144(%rsp), %rdx
mulxq 104(%rsp), %rcx, %rbx
adcxq %r15, %r13
adoxq %rcx, %r12
adcxq %rbp, %r14
adoxq %rbx, %r13
# A[0] * B[2]
mulxq %rax, %rcx, %rbx
adoxq %rbp, %r14
xorq %r15, %r15
adcxq %rcx, %r11
# A[1] * B[1]
movq 136(%rsp), %rdx
mulxq 104(%rsp), %rdx, %rcx
adcxq %rbx, %r12
adoxq %rdx, %r11
# A[1] * B[3]
movq 152(%rsp), %rdx
adoxq %rcx, %r12
mulxq 104(%rsp), %rcx, %rbx
adcxq %rcx, %r13
# A[2] * B[2]
movq 144(%rsp), %rdx
mulxq 112(%rsp), %rdx, %rcx
adcxq %rbx, %r14
adoxq %rdx, %r13
# A[3] * B[3]
movq 152(%rsp), %rdx
adoxq %rcx, %r14
mulxq 120(%rsp), %rcx, %rbx
adoxq %rbp, %r15
adcxq %rcx, %r15
# A[0] * B[3]
mulxq %rax, %rdx, %rcx
adcxq %rbx, %rbp
xorq %rbx, %rbx
adcxq %rdx, %r12
# A[3] * B[0]
movq 120(%rsp), %rdx
adcxq %rcx, %r13
mulxq 128(%rsp), %rdx, %rcx
adoxq %rdx, %r12
adoxq %rcx, %r13
# A[3] * B[2]
movq 120(%rsp), %rdx
mulxq 144(%rsp), %rdx, %rcx
adcxq %rdx, %r14
# A[2] * B[3]
movq 152(%rsp), %rdx
adcxq %rcx, %r15
mulxq 112(%rsp), %rcx, %rdx
adcxq %rbx, %rbp
adoxq %rcx, %r14
adoxq %rdx, %r15
adoxq %rbx, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rcx
addq %rbp, %r12
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r12, %rcx
imulq $19, %rcx, %rcx
andq %rbx, %r12
xorq %rbx, %rbx
adoxq %rcx, %r9
mulxq %r13, %rcx, %r13
adcxq %rcx, %r9
adoxq %r13, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
mulxq %r15, %rcx, %r15
adcxq %rcx, %r11
adoxq %r15, %r12
adcxq %rbx, %r12
# Store
movq %r9, (%rsp)
movq %r10, 8(%rsp)
movq %r11, 16(%rsp)
movq %r12, 24(%rsp)
decq 160(%rsp)
jge L_curve25519_avx2_last_3
# Invert
leaq 32(%rsp), %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
movq %rsp, %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 96(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $4, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $9, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 128(%rsp), %rdi
leaq 96(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 128(%rsp), %rdi
leaq 128(%rsp), %rsi
movq $19, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 128(%rsp), %rsi
leaq 96(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $9, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $49, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 128(%rsp), %rdi
leaq 96(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 128(%rsp), %rdi
leaq 128(%rsp), %rsi
movq $0x63, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 128(%rsp), %rsi
leaq 96(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 96(%rsp), %rdi
leaq 96(%rsp), %rsi
movq $49, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 96(%rsp), %rsi
leaq 64(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $4, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
movq %rsp, %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
movq 168(%rsp), %rdi
movq (%rdi), %rax
# Multiply
# A[0] * B[0]
movq (%rsp), %rdx
mulxq %rax, %r9, %r10
# A[2] * B[0]
mulxq 16(%rdi), %r11, %r12
# A[1] * B[0]
mulxq 8(%rdi), %rcx, %rbx
xorq %rbp, %rbp
adcxq %rcx, %r10
# A[3] * B[1]
movq 8(%rsp), %rdx
mulxq 24(%rdi), %r13, %r14
adcxq %rbx, %r11
# A[0] * B[1]
mulxq %rax, %rcx, %rbx
adoxq %rcx, %r10
# A[2] * B[1]
mulxq 16(%rdi), %rcx, %r15
adoxq %rbx, %r11
adcxq %rcx, %r12
# A[1] * B[2]
movq 16(%rsp), %rdx
mulxq 8(%rdi), %rcx, %rbx
adcxq %r15, %r13
adoxq %rcx, %r12
adcxq %rbp, %r14
adoxq %rbx, %r13
# A[0] * B[2]
mulxq %rax, %rcx, %rbx
adoxq %rbp, %r14
xorq %r15, %r15
adcxq %rcx, %r11
# A[1] * B[1]
movq 8(%rsp), %rdx
mulxq 8(%rdi), %rdx, %rcx
adcxq %rbx, %r12
adoxq %rdx, %r11
# A[1] * B[3]
movq 24(%rsp), %rdx
adoxq %rcx, %r12
mulxq 8(%rdi), %rcx, %rbx
adcxq %rcx, %r13
# A[2] * B[2]
movq 16(%rsp), %rdx
mulxq 16(%rdi), %rdx, %rcx
adcxq %rbx, %r14
adoxq %rdx, %r13
# A[3] * B[3]
movq 24(%rsp), %rdx
adoxq %rcx, %r14
mulxq 24(%rdi), %rcx, %rbx
adoxq %rbp, %r15
adcxq %rcx, %r15
# A[0] * B[3]
mulxq %rax, %rdx, %rcx
adcxq %rbx, %rbp
xorq %rbx, %rbx
adcxq %rdx, %r12
# A[3] * B[0]
movq 24(%rdi), %rdx
adcxq %rcx, %r13
mulxq (%rsp), %rdx, %rcx
adoxq %rdx, %r12
adoxq %rcx, %r13
# A[3] * B[2]
movq 24(%rdi), %rdx
mulxq 16(%rsp), %rdx, %rcx
adcxq %rdx, %r14
# A[2] * B[3]
movq 24(%rsp), %rdx
adcxq %rcx, %r15
mulxq 16(%rdi), %rcx, %rdx
adcxq %rbx, %rbp
adoxq %rcx, %r14
adoxq %rdx, %r15
adoxq %rbx, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rcx
addq %rbp, %r12
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %rbx
shldq $0x01, %r12, %rcx
imulq $19, %rcx, %rcx
andq %rbx, %r12
xorq %rbx, %rbx
adoxq %rcx, %r9
mulxq %r13, %rcx, %r13
adcxq %rcx, %r9
adoxq %r13, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
mulxq %r15, %rcx, %r15
adcxq %rcx, %r11
adoxq %r15, %r12
adcxq %rbx, %r12
movq $0x7fffffffffffffff, %rbx
movq %r12, %rdx
sarq $63, %rdx
andq $19, %rdx
andq %rbx, %r12
addq %rdx, %r9
adcq $0x00, %r10
adcq $0x00, %r11
adcq $0x00, %r12
movq $0x7fffffffffffffff, %rcx
movq %r9, %rdx
addq $19, %rdx
movq %r10, %rdx
adcq $0x00, %rdx
movq %r11, %rdx
adcq $0x00, %rdx
movq %r12, %rdx
adcq $0x00, %rdx
sarq $63, %rdx
andq $19, %rdx
andq %rcx, %r12
addq %rdx, %r9
adcq $0x00, %r10
adcq $0x00, %r11
adcq $0x00, %r12
# Store
movq %r9, (%rdi)
movq %r10, 8(%rdi)
movq %r11, 16(%rdi)
movq %r12, 24(%rdi)
xorq %rax, %rax
addq $0xb8, %rsp
popq %rbp
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size curve25519_avx2,.-curve25519_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl fe_pow22523_avx2
.type fe_pow22523_avx2,@function
.align 16
fe_pow22523_avx2:
#else
.section __TEXT,__text
.globl _fe_pow22523_avx2
.p2align 4
_fe_pow22523_avx2:
#endif /* __APPLE__ */
subq $0x70, %rsp
# pow22523
movq %rdi, 96(%rsp)
movq %rsi, 104(%rsp)
movq %rsp, %rdi
movq 104(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
movq 104(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
movq %rsp, %rdi
movq %rsp, %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
movq %rsp, %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
movq %rsp, %rdi
leaq 32(%rsp), %rsi
movq %rsp, %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
movq $4, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
movq %rsp, %rdi
leaq 32(%rsp), %rsi
movq %rsp, %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
movq $9, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
movq %rsp, %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $19, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
movq $9, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
movq %rsp, %rdi
leaq 32(%rsp), %rsi
movq %rsp, %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
movq $49, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
movq %rsp, %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 64(%rsp), %rdi
leaq 64(%rsp), %rsi
movq $0x63, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 64(%rsp), %rsi
leaq 32(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
leaq 32(%rsp), %rdi
leaq 32(%rsp), %rsi
movq $49, %rdx
#ifndef __APPLE__
callq fe_sq_n_avx2@plt
#else
callq _fe_sq_n_avx2
#endif /* __APPLE__ */
movq %rsp, %rdi
leaq 32(%rsp), %rsi
movq %rsp, %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
movq %rsp, %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
movq %rsp, %rdi
movq %rsp, %rsi
#ifndef __APPLE__
callq fe_sq_avx2@plt
#else
callq _fe_sq_avx2
#endif /* __APPLE__ */
movq 96(%rsp), %rdi
movq %rsp, %rsi
movq 104(%rsp), %rdx
#ifndef __APPLE__
callq fe_mul_avx2@plt
#else
callq _fe_mul_avx2
#endif /* __APPLE__ */
movq 104(%rsp), %rsi
movq 96(%rsp), %rdi
addq $0x70, %rsp
repz retq
#ifndef __APPLE__
.text
.globl ge_p1p1_to_p2_avx2
.type ge_p1p1_to_p2_avx2,@function
.align 16
ge_p1p1_to_p2_avx2:
#else
.section __TEXT,__text
.globl _ge_p1p1_to_p2_avx2
.p2align 4
_ge_p1p1_to_p2_avx2:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
pushq %rbp
subq $16, %rsp
movq %rdi, (%rsp)
movq %rsi, 8(%rsp)
leaq 96(%rsi), %rax
movq (%rsi), %r9
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq %r9, %r10, %r11
# A[2] * B[0]
mulxq 16(%rsi), %r12, %r13
# A[1] * B[0]
mulxq 8(%rsi), %rcx, %r8
xorq %rbp, %rbp
adcxq %rcx, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rsi), %r14, %r15
adcxq %r8, %r12
# A[0] * B[1]
mulxq %r9, %rcx, %r8
adoxq %rcx, %r11
# A[2] * B[1]
mulxq 16(%rsi), %rcx, %rbx
adoxq %r8, %r12
adcxq %rcx, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rsi), %rcx, %r8
adcxq %rbx, %r14
adoxq %rcx, %r13
adcxq %rbp, %r15
adoxq %r8, %r14
# A[0] * B[2]
mulxq %r9, %rcx, %r8
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %rcx, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rsi), %rdx, %rcx
adcxq %r8, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %rcx, %r13
mulxq 8(%rsi), %rcx, %r8
adcxq %rcx, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rsi), %rdx, %rcx
adcxq %r8, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %rcx, %r15
mulxq 24(%rsi), %rcx, %r8
adoxq %rbp, %rbx
adcxq %rcx, %rbx
# A[0] * B[3]
mulxq %r9, %rdx, %rcx
adcxq %r8, %rbp
xorq %r8, %r8
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rsi), %rdx
adcxq %rcx, %r14
mulxq (%rax), %rdx, %rcx
adoxq %rdx, %r13
adoxq %rcx, %r14
# A[3] * B[2]
movq 24(%rsi), %rdx
mulxq 16(%rax), %rdx, %rcx
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %rcx, %rbx
mulxq 16(%rsi), %rcx, %rdx
adcxq %r8, %rbp
adoxq %rcx, %r15
adoxq %rdx, %rbx
adoxq %r8, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rcx
addq %rbp, %r13
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r13, %rcx
imulq $19, %rcx, %rcx
andq %r8, %r13
xorq %r8, %r8
adoxq %rcx, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
mulxq %r15, %rcx, %r15
adcxq %rcx, %r11
adoxq %r15, %r12
mulxq %rbx, %rcx, %rbx
adcxq %rcx, %r12
adoxq %rbx, %r13
adcxq %r8, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
leaq 64(%rsi), %rsi
leaq 64(%rdi), %rdi
movq (%rsi), %r9
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq %r9, %r10, %r11
# A[2] * B[0]
mulxq 16(%rsi), %r12, %r13
# A[1] * B[0]
mulxq 8(%rsi), %rcx, %r8
xorq %rbp, %rbp
adcxq %rcx, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rsi), %r14, %r15
adcxq %r8, %r12
# A[0] * B[1]
mulxq %r9, %rcx, %r8
adoxq %rcx, %r11
# A[2] * B[1]
mulxq 16(%rsi), %rcx, %rbx
adoxq %r8, %r12
adcxq %rcx, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rsi), %rcx, %r8
adcxq %rbx, %r14
adoxq %rcx, %r13
adcxq %rbp, %r15
adoxq %r8, %r14
# A[0] * B[2]
mulxq %r9, %rcx, %r8
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %rcx, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rsi), %rdx, %rcx
adcxq %r8, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %rcx, %r13
mulxq 8(%rsi), %rcx, %r8
adcxq %rcx, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rsi), %rdx, %rcx
adcxq %r8, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %rcx, %r15
mulxq 24(%rsi), %rcx, %r8
adoxq %rbp, %rbx
adcxq %rcx, %rbx
# A[0] * B[3]
mulxq %r9, %rdx, %rcx
adcxq %r8, %rbp
xorq %r8, %r8
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rsi), %rdx
adcxq %rcx, %r14
mulxq (%rax), %rdx, %rcx
adoxq %rdx, %r13
adoxq %rcx, %r14
# A[3] * B[2]
movq 24(%rsi), %rdx
mulxq 16(%rax), %rdx, %rcx
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %rcx, %rbx
mulxq 16(%rsi), %rcx, %rdx
adcxq %r8, %rbp
adoxq %rcx, %r15
adoxq %rdx, %rbx
adoxq %r8, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rcx
addq %rbp, %r13
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r13, %rcx
imulq $19, %rcx, %rcx
andq %r8, %r13
xorq %r8, %r8
adoxq %rcx, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
mulxq %r15, %rcx, %r15
adcxq %rcx, %r11
adoxq %r15, %r12
mulxq %rbx, %rcx, %rbx
adcxq %rcx, %r12
adoxq %rbx, %r13
adcxq %r8, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
leaq -32(%rsi), %rax
leaq -32(%rdi), %rdi
movq (%rsi), %r9
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq %r9, %r10, %r11
# A[2] * B[0]
mulxq 16(%rsi), %r12, %r13
# A[1] * B[0]
mulxq 8(%rsi), %rcx, %r8
xorq %rbp, %rbp
adcxq %rcx, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rsi), %r14, %r15
adcxq %r8, %r12
# A[0] * B[1]
mulxq %r9, %rcx, %r8
adoxq %rcx, %r11
# A[2] * B[1]
mulxq 16(%rsi), %rcx, %rbx
adoxq %r8, %r12
adcxq %rcx, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rsi), %rcx, %r8
adcxq %rbx, %r14
adoxq %rcx, %r13
adcxq %rbp, %r15
adoxq %r8, %r14
# A[0] * B[2]
mulxq %r9, %rcx, %r8
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %rcx, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rsi), %rdx, %rcx
adcxq %r8, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %rcx, %r13
mulxq 8(%rsi), %rcx, %r8
adcxq %rcx, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rsi), %rdx, %rcx
adcxq %r8, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %rcx, %r15
mulxq 24(%rsi), %rcx, %r8
adoxq %rbp, %rbx
adcxq %rcx, %rbx
# A[0] * B[3]
mulxq %r9, %rdx, %rcx
adcxq %r8, %rbp
xorq %r8, %r8
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rsi), %rdx
adcxq %rcx, %r14
mulxq (%rax), %rdx, %rcx
adoxq %rdx, %r13
adoxq %rcx, %r14
# A[3] * B[2]
movq 24(%rsi), %rdx
mulxq 16(%rax), %rdx, %rcx
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %rcx, %rbx
mulxq 16(%rsi), %rcx, %rdx
adcxq %r8, %rbp
adoxq %rcx, %r15
adoxq %rdx, %rbx
adoxq %r8, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rcx
addq %rbp, %r13
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r13, %rcx
imulq $19, %rcx, %rcx
andq %r8, %r13
xorq %r8, %r8
adoxq %rcx, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
mulxq %r15, %rcx, %r15
adcxq %rcx, %r11
adoxq %r15, %r12
mulxq %rbx, %rcx, %rbx
adcxq %rcx, %r12
adoxq %rbx, %r13
adcxq %r8, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
addq $16, %rsp
popq %rbp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size ge_p1p1_to_p2_avx2,.-ge_p1p1_to_p2_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_p1p1_to_p3_avx2
.type ge_p1p1_to_p3_avx2,@function
.align 16
ge_p1p1_to_p3_avx2:
#else
.section __TEXT,__text
.globl _ge_p1p1_to_p3_avx2
.p2align 4
_ge_p1p1_to_p3_avx2:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
pushq %rbp
subq $16, %rsp
movq %rdi, (%rsp)
movq %rsi, 8(%rsp)
leaq 96(%rsi), %rax
movq (%rsi), %r9
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq %r9, %r10, %r11
# A[2] * B[0]
mulxq 16(%rsi), %r12, %r13
# A[1] * B[0]
mulxq 8(%rsi), %rcx, %r8
xorq %rbp, %rbp
adcxq %rcx, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rsi), %r14, %r15
adcxq %r8, %r12
# A[0] * B[1]
mulxq %r9, %rcx, %r8
adoxq %rcx, %r11
# A[2] * B[1]
mulxq 16(%rsi), %rcx, %rbx
adoxq %r8, %r12
adcxq %rcx, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rsi), %rcx, %r8
adcxq %rbx, %r14
adoxq %rcx, %r13
adcxq %rbp, %r15
adoxq %r8, %r14
# A[0] * B[2]
mulxq %r9, %rcx, %r8
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %rcx, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rsi), %rdx, %rcx
adcxq %r8, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %rcx, %r13
mulxq 8(%rsi), %rcx, %r8
adcxq %rcx, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rsi), %rdx, %rcx
adcxq %r8, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %rcx, %r15
mulxq 24(%rsi), %rcx, %r8
adoxq %rbp, %rbx
adcxq %rcx, %rbx
# A[0] * B[3]
mulxq %r9, %rdx, %rcx
adcxq %r8, %rbp
xorq %r8, %r8
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rsi), %rdx
adcxq %rcx, %r14
mulxq (%rax), %rdx, %rcx
adoxq %rdx, %r13
adoxq %rcx, %r14
# A[3] * B[2]
movq 24(%rsi), %rdx
mulxq 16(%rax), %rdx, %rcx
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %rcx, %rbx
mulxq 16(%rsi), %rcx, %rdx
adcxq %r8, %rbp
adoxq %rcx, %r15
adoxq %rdx, %rbx
adoxq %r8, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rcx
addq %rbp, %r13
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r13, %rcx
imulq $19, %rcx, %rcx
andq %r8, %r13
xorq %r8, %r8
adoxq %rcx, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
mulxq %r15, %rcx, %r15
adcxq %rcx, %r11
adoxq %r15, %r12
mulxq %rbx, %rcx, %rbx
adcxq %rcx, %r12
adoxq %rbx, %r13
adcxq %r8, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
leaq 32(%rsi), %rax
leaq 96(%rdi), %rdi
movq (%rsi), %r9
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq %r9, %r10, %r11
# A[2] * B[0]
mulxq 16(%rsi), %r12, %r13
# A[1] * B[0]
mulxq 8(%rsi), %rcx, %r8
xorq %rbp, %rbp
adcxq %rcx, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rsi), %r14, %r15
adcxq %r8, %r12
# A[0] * B[1]
mulxq %r9, %rcx, %r8
adoxq %rcx, %r11
# A[2] * B[1]
mulxq 16(%rsi), %rcx, %rbx
adoxq %r8, %r12
adcxq %rcx, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rsi), %rcx, %r8
adcxq %rbx, %r14
adoxq %rcx, %r13
adcxq %rbp, %r15
adoxq %r8, %r14
# A[0] * B[2]
mulxq %r9, %rcx, %r8
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %rcx, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rsi), %rdx, %rcx
adcxq %r8, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %rcx, %r13
mulxq 8(%rsi), %rcx, %r8
adcxq %rcx, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rsi), %rdx, %rcx
adcxq %r8, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %rcx, %r15
mulxq 24(%rsi), %rcx, %r8
adoxq %rbp, %rbx
adcxq %rcx, %rbx
# A[0] * B[3]
mulxq %r9, %rdx, %rcx
adcxq %r8, %rbp
xorq %r8, %r8
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rsi), %rdx
adcxq %rcx, %r14
mulxq (%rax), %rdx, %rcx
adoxq %rdx, %r13
adoxq %rcx, %r14
# A[3] * B[2]
movq 24(%rsi), %rdx
mulxq 16(%rax), %rdx, %rcx
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %rcx, %rbx
mulxq 16(%rsi), %rcx, %rdx
adcxq %r8, %rbp
adoxq %rcx, %r15
adoxq %rdx, %rbx
adoxq %r8, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rcx
addq %rbp, %r13
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r13, %rcx
imulq $19, %rcx, %rcx
andq %r8, %r13
xorq %r8, %r8
adoxq %rcx, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
mulxq %r15, %rcx, %r15
adcxq %rcx, %r11
adoxq %r15, %r12
mulxq %rbx, %rcx, %rbx
adcxq %rcx, %r12
adoxq %rbx, %r13
adcxq %r8, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
leaq 64(%rsi), %rsi
leaq -64(%rdi), %rdi
movq (%rsi), %r9
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq %r9, %r10, %r11
# A[2] * B[0]
mulxq 16(%rsi), %r12, %r13
# A[1] * B[0]
mulxq 8(%rsi), %rcx, %r8
xorq %rbp, %rbp
adcxq %rcx, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rsi), %r14, %r15
adcxq %r8, %r12
# A[0] * B[1]
mulxq %r9, %rcx, %r8
adoxq %rcx, %r11
# A[2] * B[1]
mulxq 16(%rsi), %rcx, %rbx
adoxq %r8, %r12
adcxq %rcx, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rsi), %rcx, %r8
adcxq %rbx, %r14
adoxq %rcx, %r13
adcxq %rbp, %r15
adoxq %r8, %r14
# A[0] * B[2]
mulxq %r9, %rcx, %r8
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %rcx, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rsi), %rdx, %rcx
adcxq %r8, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %rcx, %r13
mulxq 8(%rsi), %rcx, %r8
adcxq %rcx, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rsi), %rdx, %rcx
adcxq %r8, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %rcx, %r15
mulxq 24(%rsi), %rcx, %r8
adoxq %rbp, %rbx
adcxq %rcx, %rbx
# A[0] * B[3]
mulxq %r9, %rdx, %rcx
adcxq %r8, %rbp
xorq %r8, %r8
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rsi), %rdx
adcxq %rcx, %r14
mulxq (%rax), %rdx, %rcx
adoxq %rdx, %r13
adoxq %rcx, %r14
# A[3] * B[2]
movq 24(%rsi), %rdx
mulxq 16(%rax), %rdx, %rcx
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %rcx, %rbx
mulxq 16(%rsi), %rcx, %rdx
adcxq %r8, %rbp
adoxq %rcx, %r15
adoxq %rdx, %rbx
adoxq %r8, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rcx
addq %rbp, %r13
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r13, %rcx
imulq $19, %rcx, %rcx
andq %r8, %r13
xorq %r8, %r8
adoxq %rcx, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
mulxq %r15, %rcx, %r15
adcxq %rcx, %r11
adoxq %r15, %r12
mulxq %rbx, %rcx, %rbx
adcxq %rcx, %r12
adoxq %rbx, %r13
adcxq %r8, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
leaq 32(%rsi), %rax
leaq 32(%rdi), %rdi
movq (%rsi), %r9
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq %r9, %r10, %r11
# A[2] * B[0]
mulxq 16(%rsi), %r12, %r13
# A[1] * B[0]
mulxq 8(%rsi), %rcx, %r8
xorq %rbp, %rbp
adcxq %rcx, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rsi), %r14, %r15
adcxq %r8, %r12
# A[0] * B[1]
mulxq %r9, %rcx, %r8
adoxq %rcx, %r11
# A[2] * B[1]
mulxq 16(%rsi), %rcx, %rbx
adoxq %r8, %r12
adcxq %rcx, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rsi), %rcx, %r8
adcxq %rbx, %r14
adoxq %rcx, %r13
adcxq %rbp, %r15
adoxq %r8, %r14
# A[0] * B[2]
mulxq %r9, %rcx, %r8
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %rcx, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rsi), %rdx, %rcx
adcxq %r8, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %rcx, %r13
mulxq 8(%rsi), %rcx, %r8
adcxq %rcx, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rsi), %rdx, %rcx
adcxq %r8, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %rcx, %r15
mulxq 24(%rsi), %rcx, %r8
adoxq %rbp, %rbx
adcxq %rcx, %rbx
# A[0] * B[3]
mulxq %r9, %rdx, %rcx
adcxq %r8, %rbp
xorq %r8, %r8
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rsi), %rdx
adcxq %rcx, %r14
mulxq (%rax), %rdx, %rcx
adoxq %rdx, %r13
adoxq %rcx, %r14
# A[3] * B[2]
movq 24(%rsi), %rdx
mulxq 16(%rax), %rdx, %rcx
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %rcx, %rbx
mulxq 16(%rsi), %rcx, %rdx
adcxq %r8, %rbp
adoxq %rcx, %r15
adoxq %rdx, %rbx
adoxq %r8, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %rcx
addq %rbp, %r13
adcq $0x00, %rcx
movq $0x7fffffffffffffff, %r8
shldq $0x01, %r13, %rcx
imulq $19, %rcx, %rcx
andq %r8, %r13
xorq %r8, %r8
adoxq %rcx, %r10
mulxq %r14, %rcx, %r14
adcxq %rcx, %r10
adoxq %r14, %r11
mulxq %r15, %rcx, %r15
adcxq %rcx, %r11
adoxq %r15, %r12
mulxq %rbx, %rcx, %rbx
adcxq %rcx, %r12
adoxq %rbx, %r13
adcxq %r8, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
addq $16, %rsp
popq %rbp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size ge_p1p1_to_p3_avx2,.-ge_p1p1_to_p3_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_p2_dbl_avx2
.type ge_p2_dbl_avx2,@function
.align 16
ge_p2_dbl_avx2:
#else
.section __TEXT,__text
.globl _ge_p2_dbl_avx2
.p2align 4
_ge_p2_dbl_avx2:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
pushq %rbp
subq $16, %rsp
movq %rdi, (%rsp)
movq %rsi, 8(%rsp)
leaq 64(%rdi), %rdi
# Square
movq (%rsi), %rdx
movq 8(%rsi), %r9
# A[0] * A[1]
movq %rdx, %rbp
mulxq %r9, %r11, %r12
# A[0] * A[3]
mulxq 24(%rsi), %r13, %r14
# A[2] * A[1]
movq 16(%rsi), %rdx
mulxq %r9, %rcx, %r8
xorq %r10, %r10
adoxq %rcx, %r13
# A[2] * A[3]
mulxq 24(%rsi), %r15, %rbx
adoxq %r8, %r14
# A[2] * A[0]
mulxq %rbp, %rcx, %r8
adoxq %r10, %r15
adcxq %rcx, %r12
adoxq %r10, %rbx
# A[1] * A[3]
movq %r9, %rdx
mulxq 24(%rsi), %rcx, %rdx
adcxq %r8, %r13
adcxq %rcx, %r14
adcxq %rdx, %r15
adcxq %r10, %rbx
# A[0] * A[0]
movq %rbp, %rdx
mulxq %rdx, %r10, %rcx
xorq %rbp, %rbp
adcxq %r11, %r11
# A[1] * A[1]
movq %r9, %rdx
adoxq %rcx, %r11
mulxq %rdx, %rcx, %r8
adcxq %r12, %r12
adoxq %rcx, %r12
adcxq %r13, %r13
# A[2] * A[2]
movq 16(%rsi), %rdx
adoxq %r8, %r13
mulxq %rdx, %r8, %rcx
adcxq %r14, %r14
adoxq %r8, %r14
adcxq %r15, %r15
# A[3] * A[3]
movq 24(%rsi), %rdx
adoxq %rcx, %r15
mulxq %rdx, %rcx, %r8
adcxq %rbx, %rbx
adoxq %rcx, %rbx
adcxq %rbp, %rbp
adoxq %r8, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r8
addq %rbp, %r13
adcq $0x00, %r8
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r13, %r8
imulq $19, %r8, %r8
andq %rcx, %r13
xorq %rcx, %rcx
adoxq %r8, %r10
mulxq %r14, %r8, %r14
adcxq %r8, %r10
adoxq %r14, %r11
mulxq %r15, %r8, %r15
adcxq %r8, %r11
adoxq %r15, %r12
mulxq %rbx, %r8, %rbx
adcxq %r8, %r12
adoxq %rbx, %r13
adcxq %rcx, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
leaq 32(%rsi), %rsi
# Square
movq (%rsi), %rdx
movq 8(%rsi), %r9
# A[0] * A[1]
movq %rdx, %rbp
mulxq %r9, %r11, %r12
# A[0] * A[3]
mulxq 24(%rsi), %r13, %r14
# A[2] * A[1]
movq 16(%rsi), %rdx
mulxq %r9, %rcx, %r8
xorq %r10, %r10
adoxq %rcx, %r13
# A[2] * A[3]
mulxq 24(%rsi), %r15, %rbx
adoxq %r8, %r14
# A[2] * A[0]
mulxq %rbp, %rcx, %r8
adoxq %r10, %r15
adcxq %rcx, %r12
adoxq %r10, %rbx
# A[1] * A[3]
movq %r9, %rdx
mulxq 24(%rsi), %rcx, %rdx
adcxq %r8, %r13
adcxq %rcx, %r14
adcxq %rdx, %r15
adcxq %r10, %rbx
# A[0] * A[0]
movq %rbp, %rdx
mulxq %rdx, %r10, %rcx
xorq %rbp, %rbp
adcxq %r11, %r11
# A[1] * A[1]
movq %r9, %rdx
adoxq %rcx, %r11
mulxq %rdx, %rcx, %r8
adcxq %r12, %r12
adoxq %rcx, %r12
adcxq %r13, %r13
# A[2] * A[2]
movq 16(%rsi), %rdx
adoxq %r8, %r13
mulxq %rdx, %r8, %rcx
adcxq %r14, %r14
adoxq %r8, %r14
adcxq %r15, %r15
# A[3] * A[3]
movq 24(%rsi), %rdx
adoxq %rcx, %r15
mulxq %rdx, %rcx, %r8
adcxq %rbx, %rbx
adoxq %rcx, %rbx
adcxq %rbp, %rbp
adoxq %r8, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r8
addq %rbp, %r13
adcq $0x00, %r8
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r13, %r8
imulq $19, %r8, %r8
andq %rcx, %r13
xorq %rcx, %rcx
adoxq %r8, %r10
mulxq %r14, %r8, %r14
adcxq %r8, %r10
adoxq %r14, %r11
mulxq %r15, %r8, %r15
adcxq %r8, %r11
adoxq %r15, %r12
mulxq %rbx, %r8, %rbx
adcxq %r8, %r12
adoxq %rbx, %r13
adcxq %rcx, %r13
# Store
movq %rdi, %rsi
leaq -32(%rdi), %rdi
# Add-Sub
# Add
movq %r10, %r14
addq (%rsi), %r10
movq %r11, %r15
adcq 8(%rsi), %r11
movq %r12, %rbx
adcq 16(%rsi), %r12
movq %r13, %rbp
adcq 24(%rsi), %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rsi), %r14
sbbq 8(%rsi), %r15
sbbq 16(%rsi), %rbx
sbbq 24(%rsi), %rbp
sbbq %rdx, %rdx
shldq $0x01, %rbp, %rdx
imulq $-19, %rdx
btr $63, %rbp
# Add modulus (if underflow)
subq %rdx, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %r14, (%rsi)
movq %r15, 8(%rsi)
movq %rbx, 16(%rsi)
movq %rbp, 24(%rsi)
movq 8(%rsp), %rax
leaq 32(%rax), %rsi
leaq -32(%rdi), %rdi
# Add
movq (%rsi), %r10
movq 8(%rsi), %r11
addq (%rax), %r10
movq 16(%rsi), %r12
adcq 8(%rax), %r11
movq 24(%rsi), %r13
adcq 16(%rax), %r12
adcq 24(%rax), %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
# Square
movq (%rdi), %rdx
movq 8(%rdi), %r9
# A[0] * A[1]
movq %rdx, %rbp
mulxq %r9, %r11, %r12
# A[0] * A[3]
mulxq 24(%rdi), %r13, %r14
# A[2] * A[1]
movq 16(%rdi), %rdx
mulxq %r9, %rcx, %r8
xorq %r10, %r10
adoxq %rcx, %r13
# A[2] * A[3]
mulxq 24(%rdi), %r15, %rbx
adoxq %r8, %r14
# A[2] * A[0]
mulxq %rbp, %rcx, %r8
adoxq %r10, %r15
adcxq %rcx, %r12
adoxq %r10, %rbx
# A[1] * A[3]
movq %r9, %rdx
mulxq 24(%rdi), %rcx, %rdx
adcxq %r8, %r13
adcxq %rcx, %r14
adcxq %rdx, %r15
adcxq %r10, %rbx
# A[0] * A[0]
movq %rbp, %rdx
mulxq %rdx, %r10, %rcx
xorq %rbp, %rbp
adcxq %r11, %r11
# A[1] * A[1]
movq %r9, %rdx
adoxq %rcx, %r11
mulxq %rdx, %rcx, %r8
adcxq %r12, %r12
adoxq %rcx, %r12
adcxq %r13, %r13
# A[2] * A[2]
movq 16(%rdi), %rdx
adoxq %r8, %r13
mulxq %rdx, %r8, %rcx
adcxq %r14, %r14
adoxq %r8, %r14
adcxq %r15, %r15
# A[3] * A[3]
movq 24(%rdi), %rdx
adoxq %rcx, %r15
mulxq %rdx, %rcx, %r8
adcxq %rbx, %rbx
adoxq %rcx, %rbx
adcxq %rbp, %rbp
adoxq %r8, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r8
addq %rbp, %r13
adcq $0x00, %r8
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r13, %r8
imulq $19, %r8, %r8
andq %rcx, %r13
xorq %rcx, %rcx
adoxq %r8, %r10
mulxq %r14, %r8, %r14
adcxq %r8, %r10
adoxq %r14, %r11
mulxq %r15, %r8, %r15
adcxq %r8, %r11
adoxq %r15, %r12
mulxq %rbx, %r8, %rbx
adcxq %r8, %r12
adoxq %rbx, %r13
adcxq %rcx, %r13
# Store
leaq 32(%rdi), %rsi
# Sub
subq (%rsi), %r10
sbbq 8(%rsi), %r11
sbbq 16(%rsi), %r12
sbbq 24(%rsi), %r13
sbbq %rdx, %rdx
shldq $0x01, %r13, %rdx
imulq $-19, %rdx
btr $63, %r13
# Add modulus (if underflow)
subq %rdx, %r10
sbbq $0x00, %r11
sbbq $0x00, %r12
sbbq $0x00, %r13
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
leaq 64(%rax), %rax
# Square * 2
movq (%rax), %rdx
movq 8(%rax), %r9
# A[0] * A[1]
movq %rdx, %rbp
mulxq %r9, %r11, %r12
# A[0] * A[3]
mulxq 24(%rax), %r13, %r14
# A[2] * A[1]
movq 16(%rax), %rdx
mulxq %r9, %rcx, %r8
xorq %r10, %r10
adoxq %rcx, %r13
# A[2] * A[3]
mulxq 24(%rax), %r15, %rbx
adoxq %r8, %r14
# A[2] * A[0]
mulxq %rbp, %rcx, %r8
adoxq %r10, %r15
adcxq %rcx, %r12
adoxq %r10, %rbx
# A[1] * A[3]
movq %r9, %rdx
mulxq 24(%rax), %rcx, %rdx
adcxq %r8, %r13
adcxq %rcx, %r14
adcxq %rdx, %r15
adcxq %r10, %rbx
# A[0] * A[0]
movq %rbp, %rdx
mulxq %rdx, %r10, %rcx
xorq %rbp, %rbp
adcxq %r11, %r11
# A[1] * A[1]
movq %r9, %rdx
adoxq %rcx, %r11
mulxq %rdx, %rcx, %r8
adcxq %r12, %r12
adoxq %rcx, %r12
adcxq %r13, %r13
# A[2] * A[2]
movq 16(%rax), %rdx
adoxq %r8, %r13
mulxq %rdx, %r8, %rcx
adcxq %r14, %r14
adoxq %r8, %r14
adcxq %r15, %r15
# A[3] * A[3]
movq 24(%rax), %rdx
adoxq %rcx, %r15
mulxq %rdx, %rcx, %r8
adcxq %rbx, %rbx
adoxq %rcx, %rbx
adcxq %rbp, %rbp
adoxq %r8, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r9
addq %rbp, %r13
adcq $0x00, %r9
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r13, %r9
imulq $19, %r9, %r9
andq %rcx, %r13
xorq %rcx, %rcx
adoxq %r9, %r10
mulxq %r14, %r9, %r14
adcxq %r9, %r10
adoxq %r14, %r11
mulxq %r15, %r9, %r15
adcxq %r9, %r11
adoxq %r15, %r12
mulxq %rbx, %r9, %rbx
adcxq %r9, %r12
adoxq %rbx, %r13
adcxq %rcx, %r13
movq %r13, %r9
shldq $0x01, %r12, %r13
shldq $0x01, %r11, %r12
shldq $0x01, %r10, %r11
shlq $0x01, %r10
movq $0x7fffffffffffffff, %rcx
shrq $62, %r9
andq %rcx, %r13
imulq $19, %r9, %r9
addq %r9, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Store
leaq 64(%rdi), %rsi
leaq 96(%rdi), %rdi
# Sub
subq (%rsi), %r10
sbbq 8(%rsi), %r11
sbbq 16(%rsi), %r12
sbbq 24(%rsi), %r13
sbbq %rdx, %rdx
shldq $0x01, %r13, %rdx
imulq $-19, %rdx
btr $63, %r13
# Add modulus (if underflow)
subq %rdx, %r10
sbbq $0x00, %r11
sbbq $0x00, %r12
sbbq $0x00, %r13
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
addq $16, %rsp
popq %rbp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size ge_p2_dbl_avx2,.-ge_p2_dbl_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_madd_avx2
.type ge_madd_avx2,@function
.align 16
ge_madd_avx2:
#else
.section __TEXT,__text
.globl _ge_madd_avx2
.p2align 4
_ge_madd_avx2:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
pushq %rbp
movq %rdx, %rax
subq $24, %rsp
movq %rdi, (%rsp)
movq %rsi, 8(%rsp)
movq %rax, 16(%rsp)
leaq 96(%rsi), %rcx
leaq 64(%rax), %rax
leaq 96(%rdi), %rdi
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq (%rcx), %r10, %r11
# A[2] * B[0]
mulxq 16(%rcx), %r12, %r13
# A[1] * B[0]
mulxq 8(%rcx), %r8, %r9
xorq %rbp, %rbp
adcxq %r8, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rcx), %r14, %r15
adcxq %r9, %r12
# A[0] * B[1]
mulxq (%rcx), %r8, %r9
adoxq %r8, %r11
# A[2] * B[1]
mulxq 16(%rcx), %r8, %rbx
adoxq %r9, %r12
adcxq %r8, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rcx), %r8, %r9
adcxq %rbx, %r14
adoxq %r8, %r13
adcxq %rbp, %r15
adoxq %r9, %r14
# A[0] * B[2]
mulxq (%rcx), %r8, %r9
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %r8, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rcx), %rdx, %r8
adcxq %r9, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r13
mulxq 8(%rcx), %r8, %r9
adcxq %r8, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rcx), %rdx, %r8
adcxq %r9, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r15
mulxq 24(%rcx), %r8, %r9
adoxq %rbp, %rbx
adcxq %r8, %rbx
# A[0] * B[3]
mulxq (%rcx), %rdx, %r8
adcxq %r9, %rbp
xorq %r9, %r9
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rcx), %rdx
adcxq %r8, %r14
mulxq (%rax), %rdx, %r8
adoxq %rdx, %r13
adoxq %r8, %r14
# A[3] * B[2]
movq 24(%rcx), %rdx
mulxq 16(%rax), %rdx, %r8
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %r8, %rbx
mulxq 16(%rcx), %r8, %rdx
adcxq %r9, %rbp
adoxq %r8, %r15
adoxq %rdx, %rbx
adoxq %r9, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r8
addq %rbp, %r13
adcq $0x00, %r8
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %r8
imulq $19, %r8, %r8
andq %r9, %r13
xorq %r9, %r9
adoxq %r8, %r10
mulxq %r14, %r8, %r14
adcxq %r8, %r10
adoxq %r14, %r11
mulxq %r15, %r8, %r15
adcxq %r8, %r11
adoxq %r15, %r12
mulxq %rbx, %r8, %rbx
adcxq %r8, %r12
adoxq %rbx, %r13
adcxq %r9, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %rsi, %rcx
leaq 32(%rsi), %rax
leaq -64(%rdi), %rsi
leaq -96(%rdi), %rdi
# Add-Sub
# Add
movq (%rax), %r10
movq 8(%rax), %r11
movq 16(%rax), %r12
movq 24(%rax), %r13
movq %r10, %r14
addq (%rcx), %r10
movq %r11, %r15
adcq 8(%rcx), %r11
movq %r12, %rbx
adcq 16(%rcx), %r12
movq %r13, %rbp
adcq 24(%rcx), %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rcx), %r14
sbbq 8(%rcx), %r15
sbbq 16(%rcx), %rbx
sbbq 24(%rcx), %rbp
sbbq %rdx, %rdx
shldq $0x01, %rbp, %rdx
imulq $-19, %rdx
btr $63, %rbp
# Add modulus (if underflow)
subq %rdx, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %r14, (%rsi)
movq %r15, 8(%rsi)
movq %rbx, 16(%rsi)
movq %rbp, 24(%rsi)
movq 16(%rsp), %rax
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq (%rdi), %r10, %r11
# A[2] * B[0]
mulxq 16(%rdi), %r12, %r13
# A[1] * B[0]
mulxq 8(%rdi), %r8, %r9
xorq %rbp, %rbp
adcxq %r8, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rdi), %r14, %r15
adcxq %r9, %r12
# A[0] * B[1]
mulxq (%rdi), %r8, %r9
adoxq %r8, %r11
# A[2] * B[1]
mulxq 16(%rdi), %r8, %rbx
adoxq %r9, %r12
adcxq %r8, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rdi), %r8, %r9
adcxq %rbx, %r14
adoxq %r8, %r13
adcxq %rbp, %r15
adoxq %r9, %r14
# A[0] * B[2]
mulxq (%rdi), %r8, %r9
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %r8, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rdi), %rdx, %r8
adcxq %r9, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r13
mulxq 8(%rdi), %r8, %r9
adcxq %r8, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rdi), %rdx, %r8
adcxq %r9, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r15
mulxq 24(%rdi), %r8, %r9
adoxq %rbp, %rbx
adcxq %r8, %rbx
# A[0] * B[3]
mulxq (%rdi), %rdx, %r8
adcxq %r9, %rbp
xorq %r9, %r9
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rdi), %rdx
adcxq %r8, %r14
mulxq (%rax), %rdx, %r8
adoxq %rdx, %r13
adoxq %r8, %r14
# A[3] * B[2]
movq 24(%rdi), %rdx
mulxq 16(%rax), %rdx, %r8
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %r8, %rbx
mulxq 16(%rdi), %r8, %rdx
adcxq %r9, %rbp
adoxq %r8, %r15
adoxq %rdx, %rbx
adoxq %r9, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r8
addq %rbp, %r13
adcq $0x00, %r8
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %r8
imulq $19, %r8, %r8
andq %r9, %r13
xorq %r9, %r9
adoxq %r8, %r10
mulxq %r14, %r8, %r14
adcxq %r8, %r10
adoxq %r14, %r11
mulxq %r15, %r8, %r15
adcxq %r8, %r11
adoxq %r15, %r12
mulxq %rbx, %r8, %rbx
adcxq %r8, %r12
adoxq %rbx, %r13
adcxq %r9, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
leaq 32(%rax), %rax
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq (%rsi), %r10, %r11
# A[2] * B[0]
mulxq 16(%rsi), %r12, %r13
# A[1] * B[0]
mulxq 8(%rsi), %r8, %r9
xorq %rbp, %rbp
adcxq %r8, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rsi), %r14, %r15
adcxq %r9, %r12
# A[0] * B[1]
mulxq (%rsi), %r8, %r9
adoxq %r8, %r11
# A[2] * B[1]
mulxq 16(%rsi), %r8, %rbx
adoxq %r9, %r12
adcxq %r8, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rsi), %r8, %r9
adcxq %rbx, %r14
adoxq %r8, %r13
adcxq %rbp, %r15
adoxq %r9, %r14
# A[0] * B[2]
mulxq (%rsi), %r8, %r9
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %r8, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rsi), %rdx, %r8
adcxq %r9, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r13
mulxq 8(%rsi), %r8, %r9
adcxq %r8, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rsi), %rdx, %r8
adcxq %r9, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r15
mulxq 24(%rsi), %r8, %r9
adoxq %rbp, %rbx
adcxq %r8, %rbx
# A[0] * B[3]
mulxq (%rsi), %rdx, %r8
adcxq %r9, %rbp
xorq %r9, %r9
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rsi), %rdx
adcxq %r8, %r14
mulxq (%rax), %rdx, %r8
adoxq %rdx, %r13
adoxq %r8, %r14
# A[3] * B[2]
movq 24(%rsi), %rdx
mulxq 16(%rax), %rdx, %r8
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %r8, %rbx
mulxq 16(%rsi), %r8, %rdx
adcxq %r9, %rbp
adoxq %r8, %r15
adoxq %rdx, %rbx
adoxq %r9, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r8
addq %rbp, %r13
adcq $0x00, %r8
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %r8
imulq $19, %r8, %r8
andq %r9, %r13
xorq %r9, %r9
adoxq %r8, %r10
mulxq %r14, %r8, %r14
adcxq %r8, %r10
adoxq %r14, %r11
mulxq %r15, %r8, %r15
adcxq %r8, %r11
adoxq %r15, %r12
mulxq %rbx, %r8, %rbx
adcxq %r8, %r12
adoxq %rbx, %r13
adcxq %r9, %r13
# Store
movq %r10, (%rsi)
movq %r11, 8(%rsi)
movq %r12, 16(%rsi)
movq %r13, 24(%rsi)
# Add-Sub
# Add
movq (%rdi), %r10
movq 8(%rdi), %r11
movq 16(%rdi), %r12
movq 24(%rdi), %r13
movq %r10, %r14
addq (%rsi), %r10
movq %r11, %r15
adcq 8(%rsi), %r11
movq %r12, %rbx
adcq 16(%rsi), %r12
movq %r13, %rbp
adcq 24(%rsi), %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rsi), %r14
sbbq 8(%rsi), %r15
sbbq 16(%rsi), %rbx
sbbq 24(%rsi), %rbp
sbbq %rdx, %rdx
shldq $0x01, %rbp, %rdx
imulq $-19, %rdx
btr $63, %rbp
# Add modulus (if underflow)
subq %rdx, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rsi)
movq %r11, 8(%rsi)
movq %r12, 16(%rsi)
movq %r13, 24(%rsi)
movq %r14, (%rdi)
movq %r15, 8(%rdi)
movq %rbx, 16(%rdi)
movq %rbp, 24(%rdi)
leaq 64(%rcx), %rcx
# Double
movq (%rcx), %r10
movq 8(%rcx), %r11
addq %r10, %r10
movq 16(%rcx), %r12
adcq %r11, %r11
movq 24(%rcx), %r13
adcq %r12, %r12
adcq %r13, %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
leaq 96(%rdi), %rsi
leaq 64(%rdi), %rdi
# Add-Sub
# Add
movq %r10, %r14
addq (%rsi), %r10
movq %r11, %r15
adcq 8(%rsi), %r11
movq %r12, %rbx
adcq 16(%rsi), %r12
movq %r13, %rbp
adcq 24(%rsi), %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rsi), %r14
sbbq 8(%rsi), %r15
sbbq 16(%rsi), %rbx
sbbq 24(%rsi), %rbp
sbbq %rdx, %rdx
shldq $0x01, %rbp, %rdx
imulq $-19, %rdx
btr $63, %rbp
# Add modulus (if underflow)
subq %rdx, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %r14, (%rsi)
movq %r15, 8(%rsi)
movq %rbx, 16(%rsi)
movq %rbp, 24(%rsi)
addq $24, %rsp
popq %rbp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size ge_madd_avx2,.-ge_madd_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_msub_avx2
.type ge_msub_avx2,@function
.align 16
ge_msub_avx2:
#else
.section __TEXT,__text
.globl _ge_msub_avx2
.p2align 4
_ge_msub_avx2:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
pushq %rbp
movq %rdx, %rax
subq $24, %rsp
movq %rdi, (%rsp)
movq %rsi, 8(%rsp)
movq %rax, 16(%rsp)
leaq 96(%rsi), %rcx
leaq 64(%rax), %rax
leaq 96(%rdi), %rdi
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq (%rcx), %r10, %r11
# A[2] * B[0]
mulxq 16(%rcx), %r12, %r13
# A[1] * B[0]
mulxq 8(%rcx), %r8, %r9
xorq %rbp, %rbp
adcxq %r8, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rcx), %r14, %r15
adcxq %r9, %r12
# A[0] * B[1]
mulxq (%rcx), %r8, %r9
adoxq %r8, %r11
# A[2] * B[1]
mulxq 16(%rcx), %r8, %rbx
adoxq %r9, %r12
adcxq %r8, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rcx), %r8, %r9
adcxq %rbx, %r14
adoxq %r8, %r13
adcxq %rbp, %r15
adoxq %r9, %r14
# A[0] * B[2]
mulxq (%rcx), %r8, %r9
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %r8, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rcx), %rdx, %r8
adcxq %r9, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r13
mulxq 8(%rcx), %r8, %r9
adcxq %r8, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rcx), %rdx, %r8
adcxq %r9, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r15
mulxq 24(%rcx), %r8, %r9
adoxq %rbp, %rbx
adcxq %r8, %rbx
# A[0] * B[3]
mulxq (%rcx), %rdx, %r8
adcxq %r9, %rbp
xorq %r9, %r9
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rcx), %rdx
adcxq %r8, %r14
mulxq (%rax), %rdx, %r8
adoxq %rdx, %r13
adoxq %r8, %r14
# A[3] * B[2]
movq 24(%rcx), %rdx
mulxq 16(%rax), %rdx, %r8
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %r8, %rbx
mulxq 16(%rcx), %r8, %rdx
adcxq %r9, %rbp
adoxq %r8, %r15
adoxq %rdx, %rbx
adoxq %r9, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r8
addq %rbp, %r13
adcq $0x00, %r8
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %r8
imulq $19, %r8, %r8
andq %r9, %r13
xorq %r9, %r9
adoxq %r8, %r10
mulxq %r14, %r8, %r14
adcxq %r8, %r10
adoxq %r14, %r11
mulxq %r15, %r8, %r15
adcxq %r8, %r11
adoxq %r15, %r12
mulxq %rbx, %r8, %rbx
adcxq %r8, %r12
adoxq %rbx, %r13
adcxq %r9, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %rsi, %rcx
leaq 32(%rsi), %rax
leaq -64(%rdi), %rsi
leaq -96(%rdi), %rdi
# Add-Sub
# Add
movq (%rax), %r10
movq 8(%rax), %r11
movq 16(%rax), %r12
movq 24(%rax), %r13
movq %r10, %r14
addq (%rcx), %r10
movq %r11, %r15
adcq 8(%rcx), %r11
movq %r12, %rbx
adcq 16(%rcx), %r12
movq %r13, %rbp
adcq 24(%rcx), %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rcx), %r14
sbbq 8(%rcx), %r15
sbbq 16(%rcx), %rbx
sbbq 24(%rcx), %rbp
sbbq %rdx, %rdx
shldq $0x01, %rbp, %rdx
imulq $-19, %rdx
btr $63, %rbp
# Add modulus (if underflow)
subq %rdx, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %r14, (%rsi)
movq %r15, 8(%rsi)
movq %rbx, 16(%rsi)
movq %rbp, 24(%rsi)
movq 16(%rsp), %rax
leaq 32(%rax), %rax
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq (%rdi), %r10, %r11
# A[2] * B[0]
mulxq 16(%rdi), %r12, %r13
# A[1] * B[0]
mulxq 8(%rdi), %r8, %r9
xorq %rbp, %rbp
adcxq %r8, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rdi), %r14, %r15
adcxq %r9, %r12
# A[0] * B[1]
mulxq (%rdi), %r8, %r9
adoxq %r8, %r11
# A[2] * B[1]
mulxq 16(%rdi), %r8, %rbx
adoxq %r9, %r12
adcxq %r8, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rdi), %r8, %r9
adcxq %rbx, %r14
adoxq %r8, %r13
adcxq %rbp, %r15
adoxq %r9, %r14
# A[0] * B[2]
mulxq (%rdi), %r8, %r9
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %r8, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rdi), %rdx, %r8
adcxq %r9, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r13
mulxq 8(%rdi), %r8, %r9
adcxq %r8, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rdi), %rdx, %r8
adcxq %r9, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r15
mulxq 24(%rdi), %r8, %r9
adoxq %rbp, %rbx
adcxq %r8, %rbx
# A[0] * B[3]
mulxq (%rdi), %rdx, %r8
adcxq %r9, %rbp
xorq %r9, %r9
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rdi), %rdx
adcxq %r8, %r14
mulxq (%rax), %rdx, %r8
adoxq %rdx, %r13
adoxq %r8, %r14
# A[3] * B[2]
movq 24(%rdi), %rdx
mulxq 16(%rax), %rdx, %r8
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %r8, %rbx
mulxq 16(%rdi), %r8, %rdx
adcxq %r9, %rbp
adoxq %r8, %r15
adoxq %rdx, %rbx
adoxq %r9, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r8
addq %rbp, %r13
adcq $0x00, %r8
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %r8
imulq $19, %r8, %r8
andq %r9, %r13
xorq %r9, %r9
adoxq %r8, %r10
mulxq %r14, %r8, %r14
adcxq %r8, %r10
adoxq %r14, %r11
mulxq %r15, %r8, %r15
adcxq %r8, %r11
adoxq %r15, %r12
mulxq %rbx, %r8, %rbx
adcxq %r8, %r12
adoxq %rbx, %r13
adcxq %r9, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
leaq -32(%rax), %rax
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq (%rsi), %r10, %r11
# A[2] * B[0]
mulxq 16(%rsi), %r12, %r13
# A[1] * B[0]
mulxq 8(%rsi), %r8, %r9
xorq %rbp, %rbp
adcxq %r8, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rsi), %r14, %r15
adcxq %r9, %r12
# A[0] * B[1]
mulxq (%rsi), %r8, %r9
adoxq %r8, %r11
# A[2] * B[1]
mulxq 16(%rsi), %r8, %rbx
adoxq %r9, %r12
adcxq %r8, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rsi), %r8, %r9
adcxq %rbx, %r14
adoxq %r8, %r13
adcxq %rbp, %r15
adoxq %r9, %r14
# A[0] * B[2]
mulxq (%rsi), %r8, %r9
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %r8, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rsi), %rdx, %r8
adcxq %r9, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r13
mulxq 8(%rsi), %r8, %r9
adcxq %r8, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rsi), %rdx, %r8
adcxq %r9, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r15
mulxq 24(%rsi), %r8, %r9
adoxq %rbp, %rbx
adcxq %r8, %rbx
# A[0] * B[3]
mulxq (%rsi), %rdx, %r8
adcxq %r9, %rbp
xorq %r9, %r9
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rsi), %rdx
adcxq %r8, %r14
mulxq (%rax), %rdx, %r8
adoxq %rdx, %r13
adoxq %r8, %r14
# A[3] * B[2]
movq 24(%rsi), %rdx
mulxq 16(%rax), %rdx, %r8
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %r8, %rbx
mulxq 16(%rsi), %r8, %rdx
adcxq %r9, %rbp
adoxq %r8, %r15
adoxq %rdx, %rbx
adoxq %r9, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r8
addq %rbp, %r13
adcq $0x00, %r8
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %r8
imulq $19, %r8, %r8
andq %r9, %r13
xorq %r9, %r9
adoxq %r8, %r10
mulxq %r14, %r8, %r14
adcxq %r8, %r10
adoxq %r14, %r11
mulxq %r15, %r8, %r15
adcxq %r8, %r11
adoxq %r15, %r12
mulxq %rbx, %r8, %rbx
adcxq %r8, %r12
adoxq %rbx, %r13
adcxq %r9, %r13
# Store
movq %r10, (%rsi)
movq %r11, 8(%rsi)
movq %r12, 16(%rsi)
movq %r13, 24(%rsi)
# Add-Sub
# Add
movq (%rdi), %r10
movq 8(%rdi), %r11
movq 16(%rdi), %r12
movq 24(%rdi), %r13
movq %r10, %r14
addq (%rsi), %r10
movq %r11, %r15
adcq 8(%rsi), %r11
movq %r12, %rbx
adcq 16(%rsi), %r12
movq %r13, %rbp
adcq 24(%rsi), %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rsi), %r14
sbbq 8(%rsi), %r15
sbbq 16(%rsi), %rbx
sbbq 24(%rsi), %rbp
sbbq %rdx, %rdx
shldq $0x01, %rbp, %rdx
imulq $-19, %rdx
btr $63, %rbp
# Add modulus (if underflow)
subq %rdx, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rsi)
movq %r11, 8(%rsi)
movq %r12, 16(%rsi)
movq %r13, 24(%rsi)
movq %r14, (%rdi)
movq %r15, 8(%rdi)
movq %rbx, 16(%rdi)
movq %rbp, 24(%rdi)
leaq 64(%rcx), %rcx
# Double
movq (%rcx), %r10
movq 8(%rcx), %r11
addq %r10, %r10
movq 16(%rcx), %r12
adcq %r11, %r11
movq 24(%rcx), %r13
adcq %r12, %r12
adcq %r13, %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
leaq 96(%rdi), %rsi
leaq 64(%rdi), %rdi
# Add-Sub
# Add
movq %r10, %r14
addq (%rsi), %r10
movq %r11, %r15
adcq 8(%rsi), %r11
movq %r12, %rbx
adcq 16(%rsi), %r12
movq %r13, %rbp
adcq 24(%rsi), %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rsi), %r14
sbbq 8(%rsi), %r15
sbbq 16(%rsi), %rbx
sbbq 24(%rsi), %rbp
sbbq %rdx, %rdx
shldq $0x01, %rbp, %rdx
imulq $-19, %rdx
btr $63, %rbp
# Add modulus (if underflow)
subq %rdx, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rsi)
movq %r11, 8(%rsi)
movq %r12, 16(%rsi)
movq %r13, 24(%rsi)
movq %r14, (%rdi)
movq %r15, 8(%rdi)
movq %rbx, 16(%rdi)
movq %rbp, 24(%rdi)
addq $24, %rsp
popq %rbp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size ge_msub_avx2,.-ge_msub_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_add_avx2
.type ge_add_avx2,@function
.align 16
ge_add_avx2:
#else
.section __TEXT,__text
.globl _ge_add_avx2
.p2align 4
_ge_add_avx2:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
pushq %rbp
movq %rdx, %rax
subq $24, %rsp
movq %rdi, (%rsp)
movq %rsi, 8(%rsp)
movq %rax, 16(%rsp)
leaq 96(%rsi), %rcx
leaq 96(%rax), %rax
leaq 96(%rdi), %rdi
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq (%rcx), %r10, %r11
# A[2] * B[0]
mulxq 16(%rcx), %r12, %r13
# A[1] * B[0]
mulxq 8(%rcx), %r8, %r9
xorq %rbp, %rbp
adcxq %r8, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rcx), %r14, %r15
adcxq %r9, %r12
# A[0] * B[1]
mulxq (%rcx), %r8, %r9
adoxq %r8, %r11
# A[2] * B[1]
mulxq 16(%rcx), %r8, %rbx
adoxq %r9, %r12
adcxq %r8, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rcx), %r8, %r9
adcxq %rbx, %r14
adoxq %r8, %r13
adcxq %rbp, %r15
adoxq %r9, %r14
# A[0] * B[2]
mulxq (%rcx), %r8, %r9
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %r8, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rcx), %rdx, %r8
adcxq %r9, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r13
mulxq 8(%rcx), %r8, %r9
adcxq %r8, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rcx), %rdx, %r8
adcxq %r9, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r15
mulxq 24(%rcx), %r8, %r9
adoxq %rbp, %rbx
adcxq %r8, %rbx
# A[0] * B[3]
mulxq (%rcx), %rdx, %r8
adcxq %r9, %rbp
xorq %r9, %r9
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rcx), %rdx
adcxq %r8, %r14
mulxq (%rax), %rdx, %r8
adoxq %rdx, %r13
adoxq %r8, %r14
# A[3] * B[2]
movq 24(%rcx), %rdx
mulxq 16(%rax), %rdx, %r8
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %r8, %rbx
mulxq 16(%rcx), %r8, %rdx
adcxq %r9, %rbp
adoxq %r8, %r15
adoxq %rdx, %rbx
adoxq %r9, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r8
addq %rbp, %r13
adcq $0x00, %r8
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %r8
imulq $19, %r8, %r8
andq %r9, %r13
xorq %r9, %r9
adoxq %r8, %r10
mulxq %r14, %r8, %r14
adcxq %r8, %r10
adoxq %r14, %r11
mulxq %r15, %r8, %r15
adcxq %r8, %r11
adoxq %r15, %r12
mulxq %rbx, %r8, %rbx
adcxq %r8, %r12
adoxq %rbx, %r13
adcxq %r9, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %rsi, %rcx
leaq 32(%rsi), %rax
leaq -64(%rdi), %rsi
leaq -96(%rdi), %rdi
# Add-Sub
# Add
movq (%rax), %r10
movq 8(%rax), %r11
movq 16(%rax), %r12
movq 24(%rax), %r13
movq %r10, %r14
addq (%rcx), %r10
movq %r11, %r15
adcq 8(%rcx), %r11
movq %r12, %rbx
adcq 16(%rcx), %r12
movq %r13, %rbp
adcq 24(%rcx), %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rcx), %r14
sbbq 8(%rcx), %r15
sbbq 16(%rcx), %rbx
sbbq 24(%rcx), %rbp
sbbq %rdx, %rdx
shldq $0x01, %rbp, %rdx
imulq $-19, %rdx
btr $63, %rbp
# Add modulus (if underflow)
subq %rdx, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %r14, (%rsi)
movq %r15, 8(%rsi)
movq %rbx, 16(%rsi)
movq %rbp, 24(%rsi)
movq 16(%rsp), %rax
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq (%rdi), %r10, %r11
# A[2] * B[0]
mulxq 16(%rdi), %r12, %r13
# A[1] * B[0]
mulxq 8(%rdi), %r8, %r9
xorq %rbp, %rbp
adcxq %r8, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rdi), %r14, %r15
adcxq %r9, %r12
# A[0] * B[1]
mulxq (%rdi), %r8, %r9
adoxq %r8, %r11
# A[2] * B[1]
mulxq 16(%rdi), %r8, %rbx
adoxq %r9, %r12
adcxq %r8, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rdi), %r8, %r9
adcxq %rbx, %r14
adoxq %r8, %r13
adcxq %rbp, %r15
adoxq %r9, %r14
# A[0] * B[2]
mulxq (%rdi), %r8, %r9
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %r8, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rdi), %rdx, %r8
adcxq %r9, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r13
mulxq 8(%rdi), %r8, %r9
adcxq %r8, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rdi), %rdx, %r8
adcxq %r9, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r15
mulxq 24(%rdi), %r8, %r9
adoxq %rbp, %rbx
adcxq %r8, %rbx
# A[0] * B[3]
mulxq (%rdi), %rdx, %r8
adcxq %r9, %rbp
xorq %r9, %r9
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rdi), %rdx
adcxq %r8, %r14
mulxq (%rax), %rdx, %r8
adoxq %rdx, %r13
adoxq %r8, %r14
# A[3] * B[2]
movq 24(%rdi), %rdx
mulxq 16(%rax), %rdx, %r8
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %r8, %rbx
mulxq 16(%rdi), %r8, %rdx
adcxq %r9, %rbp
adoxq %r8, %r15
adoxq %rdx, %rbx
adoxq %r9, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r8
addq %rbp, %r13
adcq $0x00, %r8
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %r8
imulq $19, %r8, %r8
andq %r9, %r13
xorq %r9, %r9
adoxq %r8, %r10
mulxq %r14, %r8, %r14
adcxq %r8, %r10
adoxq %r14, %r11
mulxq %r15, %r8, %r15
adcxq %r8, %r11
adoxq %r15, %r12
mulxq %rbx, %r8, %rbx
adcxq %r8, %r12
adoxq %rbx, %r13
adcxq %r9, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
leaq 32(%rax), %rax
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq (%rsi), %r10, %r11
# A[2] * B[0]
mulxq 16(%rsi), %r12, %r13
# A[1] * B[0]
mulxq 8(%rsi), %r8, %r9
xorq %rbp, %rbp
adcxq %r8, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rsi), %r14, %r15
adcxq %r9, %r12
# A[0] * B[1]
mulxq (%rsi), %r8, %r9
adoxq %r8, %r11
# A[2] * B[1]
mulxq 16(%rsi), %r8, %rbx
adoxq %r9, %r12
adcxq %r8, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rsi), %r8, %r9
adcxq %rbx, %r14
adoxq %r8, %r13
adcxq %rbp, %r15
adoxq %r9, %r14
# A[0] * B[2]
mulxq (%rsi), %r8, %r9
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %r8, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rsi), %rdx, %r8
adcxq %r9, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r13
mulxq 8(%rsi), %r8, %r9
adcxq %r8, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rsi), %rdx, %r8
adcxq %r9, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r15
mulxq 24(%rsi), %r8, %r9
adoxq %rbp, %rbx
adcxq %r8, %rbx
# A[0] * B[3]
mulxq (%rsi), %rdx, %r8
adcxq %r9, %rbp
xorq %r9, %r9
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rsi), %rdx
adcxq %r8, %r14
mulxq (%rax), %rdx, %r8
adoxq %rdx, %r13
adoxq %r8, %r14
# A[3] * B[2]
movq 24(%rsi), %rdx
mulxq 16(%rax), %rdx, %r8
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %r8, %rbx
mulxq 16(%rsi), %r8, %rdx
adcxq %r9, %rbp
adoxq %r8, %r15
adoxq %rdx, %rbx
adoxq %r9, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r8
addq %rbp, %r13
adcq $0x00, %r8
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %r8
imulq $19, %r8, %r8
andq %r9, %r13
xorq %r9, %r9
adoxq %r8, %r10
mulxq %r14, %r8, %r14
adcxq %r8, %r10
adoxq %r14, %r11
mulxq %r15, %r8, %r15
adcxq %r8, %r11
adoxq %r15, %r12
mulxq %rbx, %r8, %rbx
adcxq %r8, %r12
adoxq %rbx, %r13
adcxq %r9, %r13
# Store
movq %r10, (%rsi)
movq %r11, 8(%rsi)
movq %r12, 16(%rsi)
movq %r13, 24(%rsi)
leaq 64(%rcx), %rcx
leaq 32(%rax), %rax
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq (%rcx), %r10, %r11
# A[2] * B[0]
mulxq 16(%rcx), %r12, %r13
# A[1] * B[0]
mulxq 8(%rcx), %r8, %r9
xorq %rbp, %rbp
adcxq %r8, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rcx), %r14, %r15
adcxq %r9, %r12
# A[0] * B[1]
mulxq (%rcx), %r8, %r9
adoxq %r8, %r11
# A[2] * B[1]
mulxq 16(%rcx), %r8, %rbx
adoxq %r9, %r12
adcxq %r8, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rcx), %r8, %r9
adcxq %rbx, %r14
adoxq %r8, %r13
adcxq %rbp, %r15
adoxq %r9, %r14
# A[0] * B[2]
mulxq (%rcx), %r8, %r9
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %r8, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rcx), %rdx, %r8
adcxq %r9, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r13
mulxq 8(%rcx), %r8, %r9
adcxq %r8, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rcx), %rdx, %r8
adcxq %r9, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r15
mulxq 24(%rcx), %r8, %r9
adoxq %rbp, %rbx
adcxq %r8, %rbx
# A[0] * B[3]
mulxq (%rcx), %rdx, %r8
adcxq %r9, %rbp
xorq %r9, %r9
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rcx), %rdx
adcxq %r8, %r14
mulxq (%rax), %rdx, %r8
adoxq %rdx, %r13
adoxq %r8, %r14
# A[3] * B[2]
movq 24(%rcx), %rdx
mulxq 16(%rax), %rdx, %r8
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %r8, %rbx
mulxq 16(%rcx), %r8, %rdx
adcxq %r9, %rbp
adoxq %r8, %r15
adoxq %rdx, %rbx
adoxq %r9, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r8
addq %rbp, %r13
adcq $0x00, %r8
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %r8
imulq $19, %r8, %r8
andq %r9, %r13
xorq %r9, %r9
adoxq %r8, %r10
mulxq %r14, %r8, %r14
adcxq %r8, %r10
adoxq %r14, %r11
mulxq %r15, %r8, %r15
adcxq %r8, %r11
adoxq %r15, %r12
mulxq %rbx, %r8, %rbx
adcxq %r8, %r12
adoxq %rbx, %r13
adcxq %r9, %r13
# Store
leaq 64(%rdi), %rdi
# Double
addq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
leaq -64(%rdi), %rdi
# Add-Sub
# Add
movq (%rdi), %r10
movq 8(%rdi), %r11
movq 16(%rdi), %r12
movq 24(%rdi), %r13
movq %r10, %r14
addq (%rsi), %r10
movq %r11, %r15
adcq 8(%rsi), %r11
movq %r12, %rbx
adcq 16(%rsi), %r12
movq %r13, %rbp
adcq 24(%rsi), %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rsi), %r14
sbbq 8(%rsi), %r15
sbbq 16(%rsi), %rbx
sbbq 24(%rsi), %rbp
sbbq %rdx, %rdx
shldq $0x01, %rbp, %rdx
imulq $-19, %rdx
btr $63, %rbp
# Add modulus (if underflow)
subq %rdx, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rsi)
movq %r11, 8(%rsi)
movq %r12, 16(%rsi)
movq %r13, 24(%rsi)
movq %r14, (%rdi)
movq %r15, 8(%rdi)
movq %rbx, 16(%rdi)
movq %rbp, 24(%rdi)
leaq 96(%rdi), %rsi
leaq 64(%rdi), %rdi
# Add-Sub
# Add
movq (%rdi), %r10
movq 8(%rdi), %r11
movq 16(%rdi), %r12
movq 24(%rdi), %r13
movq %r10, %r14
addq (%rsi), %r10
movq %r11, %r15
adcq 8(%rsi), %r11
movq %r12, %rbx
adcq 16(%rsi), %r12
movq %r13, %rbp
adcq 24(%rsi), %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rsi), %r14
sbbq 8(%rsi), %r15
sbbq 16(%rsi), %rbx
sbbq 24(%rsi), %rbp
sbbq %rdx, %rdx
shldq $0x01, %rbp, %rdx
imulq $-19, %rdx
btr $63, %rbp
# Add modulus (if underflow)
subq %rdx, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %r14, (%rsi)
movq %r15, 8(%rsi)
movq %rbx, 16(%rsi)
movq %rbp, 24(%rsi)
addq $24, %rsp
popq %rbp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size ge_add_avx2,.-ge_add_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl ge_sub_avx2
.type ge_sub_avx2,@function
.align 16
ge_sub_avx2:
#else
.section __TEXT,__text
.globl _ge_sub_avx2
.p2align 4
_ge_sub_avx2:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
pushq %rbp
movq %rdx, %rax
subq $24, %rsp
movq %rdi, (%rsp)
movq %rsi, 8(%rsp)
movq %rax, 16(%rsp)
leaq 96(%rsi), %rcx
leaq 96(%rax), %rax
leaq 96(%rdi), %rdi
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq (%rcx), %r10, %r11
# A[2] * B[0]
mulxq 16(%rcx), %r12, %r13
# A[1] * B[0]
mulxq 8(%rcx), %r8, %r9
xorq %rbp, %rbp
adcxq %r8, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rcx), %r14, %r15
adcxq %r9, %r12
# A[0] * B[1]
mulxq (%rcx), %r8, %r9
adoxq %r8, %r11
# A[2] * B[1]
mulxq 16(%rcx), %r8, %rbx
adoxq %r9, %r12
adcxq %r8, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rcx), %r8, %r9
adcxq %rbx, %r14
adoxq %r8, %r13
adcxq %rbp, %r15
adoxq %r9, %r14
# A[0] * B[2]
mulxq (%rcx), %r8, %r9
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %r8, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rcx), %rdx, %r8
adcxq %r9, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r13
mulxq 8(%rcx), %r8, %r9
adcxq %r8, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rcx), %rdx, %r8
adcxq %r9, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r15
mulxq 24(%rcx), %r8, %r9
adoxq %rbp, %rbx
adcxq %r8, %rbx
# A[0] * B[3]
mulxq (%rcx), %rdx, %r8
adcxq %r9, %rbp
xorq %r9, %r9
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rcx), %rdx
adcxq %r8, %r14
mulxq (%rax), %rdx, %r8
adoxq %rdx, %r13
adoxq %r8, %r14
# A[3] * B[2]
movq 24(%rcx), %rdx
mulxq 16(%rax), %rdx, %r8
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %r8, %rbx
mulxq 16(%rcx), %r8, %rdx
adcxq %r9, %rbp
adoxq %r8, %r15
adoxq %rdx, %rbx
adoxq %r9, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r8
addq %rbp, %r13
adcq $0x00, %r8
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %r8
imulq $19, %r8, %r8
andq %r9, %r13
xorq %r9, %r9
adoxq %r8, %r10
mulxq %r14, %r8, %r14
adcxq %r8, %r10
adoxq %r14, %r11
mulxq %r15, %r8, %r15
adcxq %r8, %r11
adoxq %r15, %r12
mulxq %rbx, %r8, %rbx
adcxq %r8, %r12
adoxq %rbx, %r13
adcxq %r9, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %rsi, %rcx
leaq 32(%rsi), %rax
leaq -64(%rdi), %rsi
leaq -96(%rdi), %rdi
# Add-Sub
# Add
movq (%rax), %r10
movq 8(%rax), %r11
movq 16(%rax), %r12
movq 24(%rax), %r13
movq %r10, %r14
addq (%rcx), %r10
movq %r11, %r15
adcq 8(%rcx), %r11
movq %r12, %rbx
adcq 16(%rcx), %r12
movq %r13, %rbp
adcq 24(%rcx), %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rcx), %r14
sbbq 8(%rcx), %r15
sbbq 16(%rcx), %rbx
sbbq 24(%rcx), %rbp
sbbq %rdx, %rdx
shldq $0x01, %rbp, %rdx
imulq $-19, %rdx
btr $63, %rbp
# Add modulus (if underflow)
subq %rdx, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %r14, (%rsi)
movq %r15, 8(%rsi)
movq %rbx, 16(%rsi)
movq %rbp, 24(%rsi)
movq 16(%rsp), %rax
leaq 32(%rax), %rax
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq (%rdi), %r10, %r11
# A[2] * B[0]
mulxq 16(%rdi), %r12, %r13
# A[1] * B[0]
mulxq 8(%rdi), %r8, %r9
xorq %rbp, %rbp
adcxq %r8, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rdi), %r14, %r15
adcxq %r9, %r12
# A[0] * B[1]
mulxq (%rdi), %r8, %r9
adoxq %r8, %r11
# A[2] * B[1]
mulxq 16(%rdi), %r8, %rbx
adoxq %r9, %r12
adcxq %r8, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rdi), %r8, %r9
adcxq %rbx, %r14
adoxq %r8, %r13
adcxq %rbp, %r15
adoxq %r9, %r14
# A[0] * B[2]
mulxq (%rdi), %r8, %r9
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %r8, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rdi), %rdx, %r8
adcxq %r9, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r13
mulxq 8(%rdi), %r8, %r9
adcxq %r8, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rdi), %rdx, %r8
adcxq %r9, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r15
mulxq 24(%rdi), %r8, %r9
adoxq %rbp, %rbx
adcxq %r8, %rbx
# A[0] * B[3]
mulxq (%rdi), %rdx, %r8
adcxq %r9, %rbp
xorq %r9, %r9
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rdi), %rdx
adcxq %r8, %r14
mulxq (%rax), %rdx, %r8
adoxq %rdx, %r13
adoxq %r8, %r14
# A[3] * B[2]
movq 24(%rdi), %rdx
mulxq 16(%rax), %rdx, %r8
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %r8, %rbx
mulxq 16(%rdi), %r8, %rdx
adcxq %r9, %rbp
adoxq %r8, %r15
adoxq %rdx, %rbx
adoxq %r9, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r8
addq %rbp, %r13
adcq $0x00, %r8
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %r8
imulq $19, %r8, %r8
andq %r9, %r13
xorq %r9, %r9
adoxq %r8, %r10
mulxq %r14, %r8, %r14
adcxq %r8, %r10
adoxq %r14, %r11
mulxq %r15, %r8, %r15
adcxq %r8, %r11
adoxq %r15, %r12
mulxq %rbx, %r8, %rbx
adcxq %r8, %r12
adoxq %rbx, %r13
adcxq %r9, %r13
# Store
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
leaq -32(%rax), %rax
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq (%rsi), %r10, %r11
# A[2] * B[0]
mulxq 16(%rsi), %r12, %r13
# A[1] * B[0]
mulxq 8(%rsi), %r8, %r9
xorq %rbp, %rbp
adcxq %r8, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rsi), %r14, %r15
adcxq %r9, %r12
# A[0] * B[1]
mulxq (%rsi), %r8, %r9
adoxq %r8, %r11
# A[2] * B[1]
mulxq 16(%rsi), %r8, %rbx
adoxq %r9, %r12
adcxq %r8, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rsi), %r8, %r9
adcxq %rbx, %r14
adoxq %r8, %r13
adcxq %rbp, %r15
adoxq %r9, %r14
# A[0] * B[2]
mulxq (%rsi), %r8, %r9
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %r8, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rsi), %rdx, %r8
adcxq %r9, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r13
mulxq 8(%rsi), %r8, %r9
adcxq %r8, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rsi), %rdx, %r8
adcxq %r9, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r15
mulxq 24(%rsi), %r8, %r9
adoxq %rbp, %rbx
adcxq %r8, %rbx
# A[0] * B[3]
mulxq (%rsi), %rdx, %r8
adcxq %r9, %rbp
xorq %r9, %r9
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rsi), %rdx
adcxq %r8, %r14
mulxq (%rax), %rdx, %r8
adoxq %rdx, %r13
adoxq %r8, %r14
# A[3] * B[2]
movq 24(%rsi), %rdx
mulxq 16(%rax), %rdx, %r8
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %r8, %rbx
mulxq 16(%rsi), %r8, %rdx
adcxq %r9, %rbp
adoxq %r8, %r15
adoxq %rdx, %rbx
adoxq %r9, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r8
addq %rbp, %r13
adcq $0x00, %r8
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %r8
imulq $19, %r8, %r8
andq %r9, %r13
xorq %r9, %r9
adoxq %r8, %r10
mulxq %r14, %r8, %r14
adcxq %r8, %r10
adoxq %r14, %r11
mulxq %r15, %r8, %r15
adcxq %r8, %r11
adoxq %r15, %r12
mulxq %rbx, %r8, %rbx
adcxq %r8, %r12
adoxq %rbx, %r13
adcxq %r9, %r13
# Store
movq %r10, (%rsi)
movq %r11, 8(%rsi)
movq %r12, 16(%rsi)
movq %r13, 24(%rsi)
leaq 64(%rcx), %rcx
leaq 64(%rax), %rax
# Multiply
# A[0] * B[0]
movq (%rax), %rdx
mulxq (%rcx), %r10, %r11
# A[2] * B[0]
mulxq 16(%rcx), %r12, %r13
# A[1] * B[0]
mulxq 8(%rcx), %r8, %r9
xorq %rbp, %rbp
adcxq %r8, %r11
# A[3] * B[1]
movq 8(%rax), %rdx
mulxq 24(%rcx), %r14, %r15
adcxq %r9, %r12
# A[0] * B[1]
mulxq (%rcx), %r8, %r9
adoxq %r8, %r11
# A[2] * B[1]
mulxq 16(%rcx), %r8, %rbx
adoxq %r9, %r12
adcxq %r8, %r13
# A[1] * B[2]
movq 16(%rax), %rdx
mulxq 8(%rcx), %r8, %r9
adcxq %rbx, %r14
adoxq %r8, %r13
adcxq %rbp, %r15
adoxq %r9, %r14
# A[0] * B[2]
mulxq (%rcx), %r8, %r9
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %r8, %r12
# A[1] * B[1]
movq 8(%rax), %rdx
mulxq 8(%rcx), %rdx, %r8
adcxq %r9, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r13
mulxq 8(%rcx), %r8, %r9
adcxq %r8, %r14
# A[2] * B[2]
movq 16(%rax), %rdx
mulxq 16(%rcx), %rdx, %r8
adcxq %r9, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%rax), %rdx
adoxq %r8, %r15
mulxq 24(%rcx), %r8, %r9
adoxq %rbp, %rbx
adcxq %r8, %rbx
# A[0] * B[3]
mulxq (%rcx), %rdx, %r8
adcxq %r9, %rbp
xorq %r9, %r9
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rcx), %rdx
adcxq %r8, %r14
mulxq (%rax), %rdx, %r8
adoxq %rdx, %r13
adoxq %r8, %r14
# A[3] * B[2]
movq 24(%rcx), %rdx
mulxq 16(%rax), %rdx, %r8
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%rax), %rdx
adcxq %r8, %rbx
mulxq 16(%rcx), %r8, %rdx
adcxq %r9, %rbp
adoxq %r8, %r15
adoxq %rdx, %rbx
adoxq %r9, %rbp
movq $38, %rdx
mulxq %rbp, %rbp, %r8
addq %rbp, %r13
adcq $0x00, %r8
movq $0x7fffffffffffffff, %r9
shldq $0x01, %r13, %r8
imulq $19, %r8, %r8
andq %r9, %r13
xorq %r9, %r9
adoxq %r8, %r10
mulxq %r14, %r8, %r14
adcxq %r8, %r10
adoxq %r14, %r11
mulxq %r15, %r8, %r15
adcxq %r8, %r11
adoxq %r15, %r12
mulxq %rbx, %r8, %rbx
adcxq %r8, %r12
adoxq %rbx, %r13
adcxq %r9, %r13
# Store
leaq 64(%rdi), %rdi
# Double
addq %r10, %r10
adcq %r11, %r11
adcq %r12, %r12
adcq %r13, %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
leaq -64(%rdi), %rdi
# Add-Sub
# Add
movq (%rdi), %r10
movq 8(%rdi), %r11
movq 16(%rdi), %r12
movq 24(%rdi), %r13
movq %r10, %r14
addq (%rsi), %r10
movq %r11, %r15
adcq 8(%rsi), %r11
movq %r12, %rbx
adcq 16(%rsi), %r12
movq %r13, %rbp
adcq 24(%rsi), %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rsi), %r14
sbbq 8(%rsi), %r15
sbbq 16(%rsi), %rbx
sbbq 24(%rsi), %rbp
sbbq %rdx, %rdx
shldq $0x01, %rbp, %rdx
imulq $-19, %rdx
btr $63, %rbp
# Add modulus (if underflow)
subq %rdx, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rsi)
movq %r11, 8(%rsi)
movq %r12, 16(%rsi)
movq %r13, 24(%rsi)
movq %r14, (%rdi)
movq %r15, 8(%rdi)
movq %rbx, 16(%rdi)
movq %rbp, 24(%rdi)
leaq 64(%rdi), %rsi
leaq 96(%rdi), %rdi
# Add-Sub
# Add
movq (%rsi), %r10
movq 8(%rsi), %r11
movq 16(%rsi), %r12
movq 24(%rsi), %r13
movq %r10, %r14
addq (%rdi), %r10
movq %r11, %r15
adcq 8(%rdi), %r11
movq %r12, %rbx
adcq 16(%rdi), %r12
movq %r13, %rbp
adcq 24(%rdi), %r13
movq $0x00, %rdx
adcq $0x00, %rdx
shldq $0x01, %r13, %rdx
imulq $19, %rdx
btr $63, %r13
# Sub modulus (if overflow)
addq %rdx, %r10
adcq $0x00, %r11
adcq $0x00, %r12
adcq $0x00, %r13
# Sub
subq (%rdi), %r14
sbbq 8(%rdi), %r15
sbbq 16(%rdi), %rbx
sbbq 24(%rdi), %rbp
sbbq %rdx, %rdx
shldq $0x01, %rbp, %rdx
imulq $-19, %rdx
btr $63, %rbp
# Add modulus (if underflow)
subq %rdx, %r14
sbbq $0x00, %r15
sbbq $0x00, %rbx
sbbq $0x00, %rbp
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
movq %r14, (%rsi)
movq %r15, 8(%rsi)
movq %rbx, 16(%rsi)
movq %rbp, 24(%rsi)
addq $24, %rsp
popq %rbp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size ge_sub_avx2,.-ge_sub_avx2
#endif /* __APPLE__ */
#ifdef HAVE_ED25519
#ifndef __APPLE__
.text
.globl fe_sq2_avx2
.type fe_sq2_avx2,@function
.align 16
fe_sq2_avx2:
#else
.section __TEXT,__text
.globl _fe_sq2_avx2
.p2align 4
_fe_sq2_avx2:
#endif /* __APPLE__ */
pushq %rbx
pushq %r12
pushq %r13
pushq %r14
pushq %r15
# Square * 2
movq (%rsi), %rdx
movq 8(%rsi), %rax
# A[0] * A[1]
movq %rdx, %r15
mulxq %rax, %r9, %r10
# A[0] * A[3]
mulxq 24(%rsi), %r11, %r12
# A[2] * A[1]
movq 16(%rsi), %rdx
mulxq %rax, %rcx, %rbx
xorq %r8, %r8
adoxq %rcx, %r11
# A[2] * A[3]
mulxq 24(%rsi), %r13, %r14
adoxq %rbx, %r12
# A[2] * A[0]
mulxq %r15, %rcx, %rbx
adoxq %r8, %r13
adcxq %rcx, %r10
adoxq %r8, %r14
# A[1] * A[3]
movq %rax, %rdx
mulxq 24(%rsi), %rcx, %rdx
adcxq %rbx, %r11
adcxq %rcx, %r12
adcxq %rdx, %r13
adcxq %r8, %r14
# A[0] * A[0]
movq %r15, %rdx
mulxq %rdx, %r8, %rcx
xorq %r15, %r15
adcxq %r9, %r9
# A[1] * A[1]
movq %rax, %rdx
adoxq %rcx, %r9
mulxq %rdx, %rcx, %rbx
adcxq %r10, %r10
adoxq %rcx, %r10
adcxq %r11, %r11
# A[2] * A[2]
movq 16(%rsi), %rdx
adoxq %rbx, %r11
mulxq %rdx, %rbx, %rcx
adcxq %r12, %r12
adoxq %rbx, %r12
adcxq %r13, %r13
# A[3] * A[3]
movq 24(%rsi), %rdx
adoxq %rcx, %r13
mulxq %rdx, %rcx, %rbx
adcxq %r14, %r14
adoxq %rcx, %r14
adcxq %r15, %r15
adoxq %rbx, %r15
movq $38, %rdx
mulxq %r15, %r15, %rax
addq %r15, %r11
adcq $0x00, %rax
movq $0x7fffffffffffffff, %rcx
shldq $0x01, %r11, %rax
imulq $19, %rax, %rax
andq %rcx, %r11
xorq %rcx, %rcx
adoxq %rax, %r8
mulxq %r12, %rax, %r12
adcxq %rax, %r8
adoxq %r12, %r9
mulxq %r13, %rax, %r13
adcxq %rax, %r9
adoxq %r13, %r10
mulxq %r14, %rax, %r14
adcxq %rax, %r10
adoxq %r14, %r11
adcxq %rcx, %r11
movq %r11, %rax
shldq $0x01, %r10, %r11
shldq $0x01, %r9, %r10
shldq $0x01, %r8, %r9
shlq $0x01, %r8
movq $0x7fffffffffffffff, %rcx
shrq $62, %rax
andq %rcx, %r11
imulq $19, %rax, %rax
addq %rax, %r8
adcq $0x00, %r9
adcq $0x00, %r10
adcq $0x00, %r11
# Store
movq %r8, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbx
repz retq
#ifndef __APPLE__
.size fe_sq2_avx2,.-fe_sq2_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl sc_reduce_avx2
.type sc_reduce_avx2,@function
.align 16
sc_reduce_avx2:
#else
.section __TEXT,__text
.globl _sc_reduce_avx2
.p2align 4
_sc_reduce_avx2:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
pushq %rbp
movq (%rdi), %r8
movq 8(%rdi), %r9
movq 16(%rdi), %r10
movq 24(%rdi), %r11
movq 32(%rdi), %r12
movq 40(%rdi), %r13
movq 48(%rdi), %r14
movq 56(%rdi), %r15
movq %r15, %rax
movq $0xfffffffffffffff, %rcx
shrq $56, %rax
shldq $4, %r14, %r15
shldq $4, %r13, %r14
shldq $4, %r12, %r13
shldq $4, %r11, %r12
andq %rcx, %r11
andq %rcx, %r15
# Add order times bits 504..511
subq %rax, %r14
sbbq $0x00, %r15
movq $0xeb2106215d086329, %rdx
mulxq %rax, %rsi, %rcx
movq $0xa7ed9ce5a30a2c13, %rdx
addq %rsi, %r13
mulxq %rax, %rsi, %rbx
adcq $0x00, %rcx
addq %rsi, %r12
adcq %rbx, %r13
adcq %rcx, %r14
adcq $0x00, %r15
# Sub product of top 4 words and order
movq $0xa7ed9ce5a30a2c13, %rdx
mulxq %r12, %rcx, %rax
addq %rcx, %r8
adcq %rax, %r9
mulxq %r14, %rcx, %rax
adcq %rcx, %r10
adcq %rax, %r11
movq $0x00, %rsi
adcq $0x00, %rsi
mulxq %r13, %rcx, %rax
addq %rcx, %r9
adcq %rax, %r10
mulxq %r15, %rcx, %rax
adcq %rcx, %r11
adcq %rax, %rsi
movq $0xeb2106215d086329, %rdx
mulxq %r12, %rcx, %rax
addq %rcx, %r9
adcq %rax, %r10
mulxq %r14, %rcx, %rax
adcq %rcx, %r11
adcq %rax, %rsi
movq $0x00, %rbx
adcq $0x00, %rbx
mulxq %r13, %rcx, %rax
addq %rcx, %r10
adcq %rax, %r11
mulxq %r15, %rcx, %rax
adcq %rcx, %rsi
adcq %rax, %rbx
subq %r12, %r10
movq %rsi, %r12
sbbq %r13, %r11
movq %rbx, %r13
sbbq %r14, %r12
sbbq %r15, %r13
movq %r13, %rax
sarq $57, %rax
# Conditionally subtract order starting at bit 125
movq $0xa000000000000000, %rsi
movq $0xcb024c634b9eba7d, %rbx
movq $0x29bdf3bd45ef39a, %rbp
movq $0x200000000000000, %rcx
andq %rax, %rsi
andq %rax, %rbx
andq %rax, %rbp
andq %rax, %rcx
addq %rsi, %r9
adcq %rbx, %r10
adcq %rbp, %r11
adcq $0x00, %r12
adcq %rcx, %r13
# Move bits 252-376 to own registers
movq $0xfffffffffffffff, %rax
shldq $4, %r12, %r13
shldq $4, %r11, %r12
andq %rax, %r11
# Sub product of top 2 words and order
# * -5812631a5cf5d3ed
movq $0xa7ed9ce5a30a2c13, %rdx
mulxq %r12, %rbp, %rax
movq $0x00, %rsi
addq %rbp, %r8
adcq %rax, %r9
mulxq %r13, %rbp, %rax
adcq $0x00, %rsi
addq %rbp, %r9
adcq %rax, %rsi
# * -14def9dea2f79cd7
movq $0xeb2106215d086329, %rdx
mulxq %r12, %rbp, %rax
movq $0x00, %rbx
addq %rbp, %r9
adcq %rax, %r10
mulxq %r13, %rbp, %rax
adcq $0x00, %rbx
addq %rbp, %r10
adcq %rax, %rbx
# Add overflows at 2 * 64
movq $0xfffffffffffffff, %rcx
andq %rcx, %r11
addq %rsi, %r10
adcq %rbx, %r11
# Subtract top at 2 * 64
subq %r12, %r10
sbbq %r13, %r11
sbbq %rcx, %rcx
# Conditional sub order
movq $0x5812631a5cf5d3ed, %rsi
movq $0x14def9dea2f79cd6, %rbx
movq $0x1000000000000000, %rbp
andq %rcx, %rsi
andq %rcx, %rbx
andq %rcx, %rbp
addq %rsi, %r8
movq $0xfffffffffffffff, %rsi
adcq %rbx, %r9
adcq $0x00, %r10
adcq %rbp, %r11
andq %rsi, %r11
# Store result
movq %r8, (%rdi)
movq %r9, 8(%rdi)
movq %r10, 16(%rdi)
movq %r11, 24(%rdi)
popq %rbp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size sc_reduce_avx2,.-sc_reduce_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.text
.globl sc_muladd_avx2
.type sc_muladd_avx2,@function
.align 16
sc_muladd_avx2:
#else
.section __TEXT,__text
.globl _sc_muladd_avx2
.p2align 4
_sc_muladd_avx2:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
pushq %rbp
movq %rdx, %r8
movq %rcx, %r9
# Multiply
# A[0] * B[0]
movq (%r8), %rdx
mulxq (%rsi), %r10, %r11
# A[2] * B[0]
mulxq 16(%rsi), %r12, %r13
# A[1] * B[0]
mulxq 8(%rsi), %rax, %rcx
xorq %rbp, %rbp
adcxq %rax, %r11
# A[3] * B[1]
movq 8(%r8), %rdx
mulxq 24(%rsi), %r14, %r15
adcxq %rcx, %r12
# A[0] * B[1]
mulxq (%rsi), %rax, %rcx
adoxq %rax, %r11
# A[2] * B[1]
mulxq 16(%rsi), %rax, %rbx
adoxq %rcx, %r12
adcxq %rax, %r13
# A[1] * B[2]
movq 16(%r8), %rdx
mulxq 8(%rsi), %rax, %rcx
adcxq %rbx, %r14
adoxq %rax, %r13
adcxq %rbp, %r15
adoxq %rcx, %r14
# A[0] * B[2]
mulxq (%rsi), %rax, %rcx
adoxq %rbp, %r15
xorq %rbx, %rbx
adcxq %rax, %r12
# A[1] * B[1]
movq 8(%r8), %rdx
mulxq 8(%rsi), %rdx, %rax
adcxq %rcx, %r13
adoxq %rdx, %r12
# A[1] * B[3]
movq 24(%r8), %rdx
adoxq %rax, %r13
mulxq 8(%rsi), %rax, %rcx
adcxq %rax, %r14
# A[2] * B[2]
movq 16(%r8), %rdx
mulxq 16(%rsi), %rdx, %rax
adcxq %rcx, %r15
adoxq %rdx, %r14
# A[3] * B[3]
movq 24(%r8), %rdx
adoxq %rax, %r15
mulxq 24(%rsi), %rax, %rcx
adoxq %rbp, %rbx
adcxq %rax, %rbx
# A[0] * B[3]
mulxq (%rsi), %rdx, %rax
adcxq %rcx, %rbp
xorq %rcx, %rcx
adcxq %rdx, %r13
# A[3] * B[0]
movq 24(%rsi), %rdx
adcxq %rax, %r14
mulxq (%r8), %rdx, %rax
adoxq %rdx, %r13
adoxq %rax, %r14
# A[3] * B[2]
movq 24(%rsi), %rdx
mulxq 16(%r8), %rdx, %rax
adcxq %rdx, %r15
# A[2] * B[3]
movq 24(%r8), %rdx
adcxq %rax, %rbx
mulxq 16(%rsi), %rax, %rdx
adcxq %rcx, %rbp
adoxq %rax, %r15
adoxq %rdx, %rbx
adoxq %rcx, %rbp
# Add c to a * b
addq (%r9), %r10
adcq 8(%r9), %r11
adcq 16(%r9), %r12
adcq 24(%r9), %r13
adcq $0x00, %r14
adcq $0x00, %r15
adcq $0x00, %rbx
adcq $0x00, %rbp
movq %rbp, %rax
movq $0xfffffffffffffff, %rcx
shrq $56, %rax
shldq $4, %rbx, %rbp
shldq $4, %r15, %rbx
shldq $4, %r14, %r15
shldq $4, %r13, %r14
andq %rcx, %r13
andq %rcx, %rbp
# Add order times bits 504..507
subq %rax, %rbx
sbbq $0x00, %rbp
movq $0xeb2106215d086329, %rdx
mulxq %rax, %rsi, %rcx
movq $0xa7ed9ce5a30a2c13, %rdx
addq %rsi, %r15
mulxq %rax, %rsi, %r8
adcq $0x00, %rcx
addq %rsi, %r14
adcq %r8, %r15
adcq %rcx, %rbx
adcq $0x00, %rbp
# Sub product of top 4 words and order
movq $0xa7ed9ce5a30a2c13, %rdx
mulxq %r14, %rcx, %rax
addq %rcx, %r10
adcq %rax, %r11
mulxq %rbx, %rcx, %rax
adcq %rcx, %r12
adcq %rax, %r13
movq $0x00, %rsi
adcq $0x00, %rsi
mulxq %r15, %rcx, %rax
addq %rcx, %r11
adcq %rax, %r12
mulxq %rbp, %rcx, %rax
adcq %rcx, %r13
adcq %rax, %rsi
movq $0xeb2106215d086329, %rdx
mulxq %r14, %rcx, %rax
addq %rcx, %r11
adcq %rax, %r12
mulxq %rbx, %rcx, %rax
adcq %rcx, %r13
adcq %rax, %rsi
movq $0x00, %r8
adcq $0x00, %r8
mulxq %r15, %rcx, %rax
addq %rcx, %r12
adcq %rax, %r13
mulxq %rbp, %rcx, %rax
adcq %rcx, %rsi
adcq %rax, %r8
subq %r14, %r12
movq %rsi, %r14
sbbq %r15, %r13
movq %r8, %r15
sbbq %rbx, %r14
sbbq %rbp, %r15
movq %r15, %rax
sarq $57, %rax
# Conditionally subtract order starting at bit 125
movq $0xa000000000000000, %rsi
movq $0xcb024c634b9eba7d, %r8
movq $0x29bdf3bd45ef39a, %r9
movq $0x200000000000000, %rcx
andq %rax, %rsi
andq %rax, %r8
andq %rax, %r9
andq %rax, %rcx
addq %rsi, %r11
adcq %r8, %r12
adcq %r9, %r13
adcq $0x00, %r14
adcq %rcx, %r15
# Move bits 252-376 to own registers
movq $0xfffffffffffffff, %rax
shldq $4, %r14, %r15
shldq $4, %r13, %r14
andq %rax, %r13
# Sub product of top 2 words and order
# * -5812631a5cf5d3ed
movq $0xa7ed9ce5a30a2c13, %rdx
mulxq %r14, %r9, %rax
movq $0x00, %rsi
addq %r9, %r10
adcq %rax, %r11
mulxq %r15, %r9, %rax
adcq $0x00, %rsi
addq %r9, %r11
adcq %rax, %rsi
# * -14def9dea2f79cd7
movq $0xeb2106215d086329, %rdx
mulxq %r14, %r9, %rax
movq $0x00, %r8
addq %r9, %r11
adcq %rax, %r12
mulxq %r15, %r9, %rax
adcq $0x00, %r8
addq %r9, %r12
adcq %rax, %r8
# Add overflows at 2 * 64
movq $0xfffffffffffffff, %rcx
andq %rcx, %r13
addq %rsi, %r12
adcq %r8, %r13
# Subtract top at 2 * 64
subq %r14, %r12
sbbq %r15, %r13
sbbq %rcx, %rcx
# Conditional sub order
movq $0x5812631a5cf5d3ed, %rsi
movq $0x14def9dea2f79cd6, %r8
movq $0x1000000000000000, %r9
andq %rcx, %rsi
andq %rcx, %r8
andq %rcx, %r9
addq %rsi, %r10
movq $0xfffffffffffffff, %rsi
adcq %r8, %r11
adcq $0x00, %r12
adcq %r9, %r13
andq %rsi, %r13
# Store result
movq %r10, (%rdi)
movq %r11, 8(%rdi)
movq %r12, 16(%rdi)
movq %r13, 24(%rdi)
popq %rbp
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size sc_muladd_avx2,.-sc_muladd_avx2
#endif /* __APPLE__ */
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
L_sp_mod_inv_avx2__prime:
.long 0x03ffffed,0x03ffffff,0x03ffffff,0x03ffffff
.long 0x03ffffff,0x00000000,0x00000000,0x00000000
.long 0x03ffffff,0x03ffffff,0x03ffffff,0x03ffffff
.long 0x001fffff,0x00000000,0x00000000,0x00000000
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
#ifndef __APPLE__
.align 32
#else
.p2align 5
#endif /* __APPLE__ */
L_sp_mod_inv_avx2__one:
.quad 0x1, 0x0
.quad 0x0, 0x0
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
L_sp_mod_inv_avx2__all_one:
.long 0x00000001,0x00000001,0x00000001,0x00000001
.long 0x00000001,0x00000001,0x00000001,0x00000001
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
L_sp_mod_inv_avx2__mask01111:
.long 0x00000000,0x00000001,0x00000001,0x00000001
.long 0x00000001,0x00000000,0x00000000,0x00000000
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
L_sp_mod_inv_avx2__down_one_dword:
.long 0x00000001,0x00000002,0x00000003,0x00000004
.long 0x00000005,0x00000006,0x00000007,0x00000007
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
L_sp_mod_inv_avx2__neg:
.long 0x00000000,0x00000000,0x00000000,0x00000000
.long 0x80000000,0x00000000,0x00000000,0x00000000
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
L_sp_mod_inv_avx2__up_one_dword:
.long 0x00000007,0x00000000,0x00000001,0x00000002
.long 0x00000003,0x00000007,0x00000007,0x00000007
#ifndef __APPLE__
.data
#else
.section __DATA,__data
#endif /* __APPLE__ */
L_sp_mod_inv_avx2__mask26:
.long 0x03ffffff,0x03ffffff,0x03ffffff,0x03ffffff
.long 0x03ffffff,0x00000000,0x00000000,0x00000000
/* Non-constant time modular inversion.
*
* @param [out] r Resulting number.
* @param [in] a Number to invert.
* @param [in] m Modulus.
* @return MP_OKAY on success.
*/
#ifndef __APPLE__
.text
.globl fe_invert_nct_avx2
.type fe_invert_nct_avx2,@function
.align 16
fe_invert_nct_avx2:
#else
.section __TEXT,__text
.globl _fe_invert_nct_avx2
.p2align 4
_fe_invert_nct_avx2:
#endif /* __APPLE__ */
pushq %r12
pushq %r13
pushq %r14
pushq %r15
pushq %rbx
movq $-19, %rax
movq $-1, %rcx
movq $-1, %r8
movq $0x7fffffffffffffff, %r9
movq (%rsi), %r10
movq 8(%rsi), %r11
movq 16(%rsi), %r12
movq 24(%rsi), %r13
leaq L_sp_mod_inv_avx2__prime(%rip), %rbx
vmovupd (%rbx), %ymm6
vmovupd 32(%rbx), %ymm7
leaq L_sp_mod_inv_avx2__one(%rip), %rbx
vmovupd (%rbx), %ymm8
leaq L_sp_mod_inv_avx2__mask01111(%rip), %rbx
vmovupd (%rbx), %ymm9
leaq L_sp_mod_inv_avx2__all_one(%rip), %rbx
vmovupd (%rbx), %ymm10
leaq L_sp_mod_inv_avx2__down_one_dword(%rip), %rbx
vmovupd (%rbx), %ymm11
leaq L_sp_mod_inv_avx2__neg(%rip), %rbx
vmovupd (%rbx), %ymm12
leaq L_sp_mod_inv_avx2__up_one_dword(%rip), %rbx
vmovupd (%rbx), %ymm13
leaq L_sp_mod_inv_avx2__mask26(%rip), %rbx
vmovupd (%rbx), %ymm14
vpxor %xmm0, %xmm0, %xmm0
vpxor %xmm1, %xmm1, %xmm1
vmovdqu %ymm8, %ymm2
vpxor %xmm3, %xmm3, %xmm3
testb $0x01, %r10b
jnz L__mod_inv_avx2__v_even_end
L__mod_inv_avx2__v_even_start:
shrdq $0x01, %r11, %r10
shrdq $0x01, %r12, %r11
shrdq $0x01, %r13, %r12
shrq $0x01, %r13
vptest %ymm8, %ymm2
jz L__mod_inv_avx2__v_even_shr1
vpaddd %ymm6, %ymm2, %ymm2
vpaddd %ymm7, %ymm3, %ymm3
L__mod_inv_avx2__v_even_shr1:
vpand %ymm9, %ymm2, %ymm4
vpand %ymm10, %ymm3, %ymm5
vpermd %ymm4, %ymm11, %ymm4
vpsrad $0x01, %ymm2, %ymm2
vpsrad $0x01, %ymm3, %ymm3
vpslld $25, %ymm5, %ymm5
vpslld $25, %xmm4, %xmm4
vpaddd %ymm5, %ymm2, %ymm2
vpaddd %ymm4, %ymm3, %ymm3
testb $0x01, %r10b
jz L__mod_inv_avx2__v_even_start
L__mod_inv_avx2__v_even_end:
L__mod_inv_avx2__uv_start:
cmpq %r13, %r9
jb L__mod_inv_avx2__uv_v
ja L__mod_inv_avx2__uv_u
cmpq %r12, %r8
jb L__mod_inv_avx2__uv_v
ja L__mod_inv_avx2__uv_u
cmpq %r11, %rcx
jb L__mod_inv_avx2__uv_v
ja L__mod_inv_avx2__uv_u
cmpq %r10, %rax
jb L__mod_inv_avx2__uv_v
L__mod_inv_avx2__uv_u:
subq %r10, %rax
sbbq %r11, %rcx
vpsubd %ymm2, %ymm0, %ymm0
sbbq %r12, %r8
vpsubd %ymm3, %ymm1, %ymm1
sbbq %r13, %r9
vptest %ymm12, %ymm1
jz L__mod_inv_avx2__usubv_done_neg
vpaddd %ymm6, %ymm0, %ymm0
vpaddd %ymm7, %ymm1, %ymm1
L__mod_inv_avx2__usubv_done_neg:
L__mod_inv_avx2__usubv_shr1:
shrdq $0x01, %rcx, %rax
shrdq $0x01, %r8, %rcx
shrdq $0x01, %r9, %r8
shrq $0x01, %r9
vptest %ymm8, %ymm0
jz L__mod_inv_avx2__usubv_sub_shr1
vpaddd %ymm6, %ymm0, %ymm0
vpaddd %ymm7, %ymm1, %ymm1
L__mod_inv_avx2__usubv_sub_shr1:
vpand %ymm9, %ymm0, %ymm4
vpand %ymm10, %ymm1, %ymm5
vpermd %ymm4, %ymm11, %ymm4
vpsrad $0x01, %ymm0, %ymm0
vpsrad $0x01, %ymm1, %ymm1
vpslld $25, %ymm5, %ymm5
vpslld $25, %xmm4, %xmm4
vpaddd %ymm5, %ymm0, %ymm0
vpaddd %ymm4, %ymm1, %ymm1
testb $0x01, %al
jz L__mod_inv_avx2__usubv_shr1
cmpq $0x01, %rax
jne L__mod_inv_avx2__uv_start
movq %rcx, %rdx
orq %r8, %rdx
jne L__mod_inv_avx2__uv_start
orq %r9, %rdx
jne L__mod_inv_avx2__uv_start
vpextrd $0x00, %xmm0, %eax
vpextrd $0x01, %xmm0, %r8d
vpextrd $2, %xmm0, %r10d
vpextrd $3, %xmm0, %r12d
vpextrd $0x00, %xmm1, %ecx
vpextrd $0x01, %xmm1, %r9d
vpextrd $2, %xmm1, %r11d
vpextrd $3, %xmm1, %r13d
vextracti128 $0x01, %ymm0, %xmm0
vextracti128 $0x01, %ymm1, %xmm1
vpextrd $0x00, %xmm0, %r14d
vpextrd $0x00, %xmm1, %r15d
jmp L__mod_inv_avx2__store_done
L__mod_inv_avx2__uv_v:
subq %rax, %r10
sbbq %rcx, %r11
vpsubd %ymm0, %ymm2, %ymm2
sbbq %r8, %r12
vpsubd %ymm1, %ymm3, %ymm3
sbbq %r9, %r13
vptest %ymm12, %ymm3
jz L__mod_inv_avx2__vsubu_done_neg
vpaddd %ymm6, %ymm2, %ymm2
vpaddd %ymm7, %ymm3, %ymm3
L__mod_inv_avx2__vsubu_done_neg:
L__mod_inv_avx2__vsubu_shr1:
shrdq $0x01, %r11, %r10
shrdq $0x01, %r12, %r11
shrdq $0x01, %r13, %r12
shrq $0x01, %r13
vptest %ymm8, %ymm2
jz L__mod_inv_avx2__vsubu_sub_shr1
vpaddd %ymm6, %ymm2, %ymm2
vpaddd %ymm7, %ymm3, %ymm3
L__mod_inv_avx2__vsubu_sub_shr1:
vpand %ymm9, %ymm2, %ymm4
vpand %ymm10, %ymm3, %ymm5
vpermd %ymm4, %ymm11, %ymm4
vpsrad $0x01, %ymm2, %ymm2
vpsrad $0x01, %ymm3, %ymm3
vpslld $25, %ymm5, %ymm5
vpslld $25, %xmm4, %xmm4
vpaddd %ymm5, %ymm2, %ymm2
vpaddd %ymm4, %ymm3, %ymm3
testb $0x01, %r10b
jz L__mod_inv_avx2__vsubu_shr1
cmpq $0x01, %r10
jne L__mod_inv_avx2__uv_start
movq %r11, %rdx
orq %r12, %rdx
jne L__mod_inv_avx2__uv_start
orq %r13, %rdx
jne L__mod_inv_avx2__uv_start
vpextrd $0x00, %xmm2, %eax
vpextrd $0x01, %xmm2, %r8d
vpextrd $2, %xmm2, %r10d
vpextrd $3, %xmm2, %r12d
vpextrd $0x00, %xmm3, %ecx
vpextrd $0x01, %xmm3, %r9d
vpextrd $2, %xmm3, %r11d
vpextrd $3, %xmm3, %r13d
vextracti128 $0x01, %ymm2, %xmm2
vextracti128 $0x01, %ymm3, %xmm3
vpextrd $0x00, %xmm2, %r14d
vpextrd $0x00, %xmm3, %r15d
L__mod_inv_avx2__store_done:
movl %eax, %edx
andl $0x3ffffff, %eax
sarl $26, %edx
addl %edx, %ecx
movl %ecx, %edx
andl $0x3ffffff, %ecx
sarl $26, %edx
addl %edx, %r8d
movl %r8d, %edx
andl $0x3ffffff, %r8d
sarl $26, %edx
addl %edx, %r9d
movl %r9d, %edx
andl $0x3ffffff, %r9d
sarl $26, %edx
addl %edx, %r10d
movl %r10d, %edx
andl $0x3ffffff, %r10d
sarl $26, %edx
addl %edx, %r11d
movl %r11d, %edx
andl $0x3ffffff, %r11d
sarl $26, %edx
addl %edx, %r12d
movl %r12d, %edx
andl $0x3ffffff, %r12d
sarl $26, %edx
addl %edx, %r13d
movl %r13d, %edx
andl $0x3ffffff, %r13d
sarl $26, %edx
addl %edx, %r14d
movl %r14d, %edx
andl $0x3ffffff, %r14d
sarl $26, %edx
addl %edx, %r15d
movslq %ecx, %rcx
movslq %r9d, %r9
movslq %r11d, %r11
movslq %r13d, %r13
movslq %r15d, %r15
shlq $26, %rcx
shlq $26, %r9
shlq $26, %r11
shlq $26, %r13
shlq $26, %r15
movslq %eax, %rax
addq %rcx, %rax
movslq %r8d, %r8
adcq %r9, %r8
movslq %r10d, %r10
adcq %r11, %r10
movslq %r12d, %r12
adcq %r13, %r12
movslq %r14d, %r14
adcq %r15, %r14
jge L__mod_inv_avx2__3_no_add_prime
movq $0xfffffffffffed, %rcx
movq $0xfffffffffffff, %r9
movq $0xfffffffffffff, %r11
movq $0xfffffffffffff, %r13
movq $0x7fffffffffff, %r15
addq %rcx, %rax
addq %r9, %r8
addq %r11, %r10
addq %r13, %r12
addq %r15, %r14
movq $0xfffffffffffff, %rdx
movq %rax, %rcx
andq %rdx, %rax
sarq $52, %rcx
addq %rcx, %r8
movq %r8, %r9
andq %rdx, %r8
sarq $52, %r9
addq %r9, %r10
movq %r10, %r11
andq %rdx, %r10
sarq $52, %r11
addq %r11, %r12
movq %r12, %r13
andq %rdx, %r12
sarq $52, %r13
addq %r13, %r14
L__mod_inv_avx2__3_no_add_prime:
movq %r8, %rcx
movq %r10, %r9
movq %r12, %r11
shlq $52, %rcx
sarq $12, %r8
shlq $40, %r9
sarq $24, %r10
shlq $28, %r11
sarq $36, %r12
shlq $16, %r14
addq %rcx, %rax
adcq %r9, %r8
adcq %r11, %r10
adcq %r14, %r12
movq %rax, (%rdi)
movq %r8, 8(%rdi)
movq %r10, 16(%rdi)
movq %r12, 24(%rdi)
vzeroupper
popq %rbx
popq %r15
popq %r14
popq %r13
popq %r12
repz retq
#ifndef __APPLE__
.size fe_invert_nct_avx2,.-fe_invert_nct_avx2
#endif /* __APPLE__ */
#endif /* HAVE_ED25519 */
#endif /* HAVE_INTEL_AVX2 */
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif