pqc_kyber 0.7.0

A rust implementation of the post-quantum Kyber KEM algorithm
Documentation
%include "fq.inc"
%include "consts.inc"

SECTION .text
reduce128_avx:
;load
vmovdqa		ymm2,[rdi]
vmovdqa		ymm3,[rdi + 32]
vmovdqa		ymm4,[rdi + 64]
vmovdqa		ymm5,[rdi + 96]
vmovdqa		ymm6,[rdi + 128]
vmovdqa		ymm7,[rdi + 160]
vmovdqa		ymm8,[rdi + 192]
vmovdqa		ymm9,[rdi + 224]

red16		2
red16		3
red16		4
red16		5
red16		6
red16		7
red16		8
red16		9

;store
vmovdqa		[rdi],ymm2
vmovdqa		[rdi + 32],ymm3
vmovdqa		[rdi + 64],ymm4
vmovdqa		[rdi + 96],ymm5
vmovdqa		[rdi + 128],ymm6
vmovdqa		[rdi + 160],ymm7
vmovdqa		[rdi + 192],ymm8
vmovdqa		[rdi + 224],ymm9

ret

global reduce_avx
global _reduce_avx
reduce_avx:
_reduce_avx:
;consts
vmovdqa		ymm0,[rsi + _16XQ*2]
vmovdqa		ymm1,[rsi + _16XV*2]
call		reduce128_avx
add		rdi,256
call		reduce128_avx
ret

tomont128_avx:
;load
vmovdqa		ymm3,[rdi]
vmovdqa		ymm4,[rdi + 32]
vmovdqa		ymm5,[rdi + 64]
vmovdqa		ymm6,[rdi + 96]
vmovdqa		ymm7,[rdi + 128]
vmovdqa		ymm8,[rdi + 160]
vmovdqa		ymm9,[rdi + 192]
vmovdqa		ymm10,[rdi + 224]

fqmulprecomp	1,2,3,11
fqmulprecomp	1,2,4,12
fqmulprecomp	1,2,5,13
fqmulprecomp	1,2,6,14
fqmulprecomp	1,2,7,15
fqmulprecomp	1,2,8,11
fqmulprecomp	1,2,9,12
fqmulprecomp	1,2,10,13

;store
vmovdqa		[rdi],ymm3
vmovdqa		[rdi + 32],ymm4
vmovdqa		[rdi + 64],ymm5
vmovdqa		[rdi + 96],ymm6
vmovdqa		[rdi + 128],ymm7
vmovdqa		[rdi + 160],ymm8
vmovdqa		[rdi + 192],ymm9
vmovdqa		[rdi + 224],ymm10

ret

global tomont_avx
global _tomont_avx
tomont_avx:
_tomont_avx:
;consts
vmovdqa		ymm0,[rsi + _16XQ*2]
vmovdqa		ymm1,[rsi + _16XMONTSQLO*2]
vmovdqa		ymm2,[rsi + _16XMONTSQHI*2]
call		tomont128_avx
add		rdi,256
call		tomont128_avx
ret