ring 0.17.0-beta.2

Safe, fast, small crypto using Rust.
Documentation
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.

#include <ring-core/asm_base.h>

#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__)
.text	
.extern	OPENSSL_ia32cap_P
.hidden OPENSSL_ia32cap_P

chacha20_poly1305_constants:

.section	.rodata
.align	64
.Lchacha20_consts:
.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
.Lrol8:
.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
.Lrol16:
.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
.Lavx2_init:
.long	0,0,0,0
.Lsse_inc:
.long	1,0,0,0
.Lavx2_inc:
.long	2,0,0,0,2,0,0,0
.Lclamp:
.quad	0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
.quad	0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
.align	16
.Land_masks:
.byte	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
.text	

.type	poly_hash_ad_internal,@function
.align	64
poly_hash_ad_internal:
.cfi_startproc	
.cfi_def_cfa	rsp, 8
	xorq	%r10,%r10
	xorq	%r11,%r11
	xorq	%r12,%r12
	cmpq	$13,%r8
	jne	.Lhash_ad_loop
.Lpoly_fast_tls_ad:

	movq	(%rcx),%r10
	movq	5(%rcx),%r11
	shrq	$24,%r11
	movq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	ret
.Lhash_ad_loop:

	cmpq	$16,%r8
	jb	.Lhash_ad_tail
	addq	0+0(%rcx),%r10
	adcq	8+0(%rcx),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%rcx),%rcx
	subq	$16,%r8
	jmp	.Lhash_ad_loop
.Lhash_ad_tail:
	cmpq	$0,%r8
	je	.Lhash_ad_done

	xorq	%r13,%r13
	xorq	%r14,%r14
	xorq	%r15,%r15
	addq	%r8,%rcx
.Lhash_ad_tail_loop:
	shldq	$8,%r13,%r14
	shlq	$8,%r13
	movzbq	-1(%rcx),%r15
	xorq	%r15,%r13
	decq	%rcx
	decq	%r8
	jne	.Lhash_ad_tail_loop

	addq	%r13,%r10
	adcq	%r14,%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12


.Lhash_ad_done:
	ret
.cfi_endproc	
.size	poly_hash_ad_internal, .-poly_hash_ad_internal

.globl	chacha20_poly1305_open
.hidden chacha20_poly1305_open
.type	chacha20_poly1305_open,@function
.align	64
chacha20_poly1305_open:
.cfi_startproc	
_CET_ENDBR
	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56


	pushq	%r9
.cfi_adjust_cfa_offset	8
.cfi_offset	%r9,-64
	subq	$288 + 0 + 32,%rsp
.cfi_adjust_cfa_offset	288 + 32

	leaq	32(%rsp),%rbp
	andq	$-32,%rbp

	movq	%rdx,%rbx
	movq	%r8,0+0+32(%rbp)
	movq	%rbx,8+0+32(%rbp)

	movl	OPENSSL_ia32cap_P+8(%rip),%eax
	andl	$288,%eax
	xorl	$288,%eax
	jz	chacha20_poly1305_open_avx2

	cmpq	$128,%rbx
	jbe	.Lopen_sse_128

	movdqa	.Lchacha20_consts(%rip),%xmm0
	movdqu	0(%r9),%xmm4
	movdqu	16(%r9),%xmm8
	movdqu	32(%r9),%xmm12

	movdqa	%xmm12,%xmm7

	movdqa	%xmm4,0+48(%rbp)
	movdqa	%xmm8,0+64(%rbp)
	movdqa	%xmm12,0+96(%rbp)
	movq	$10,%r10
.Lopen_sse_init_rounds:
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,4
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,12
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,12
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,4

	decq	%r10
	jne	.Lopen_sse_init_rounds

	paddd	.Lchacha20_consts(%rip),%xmm0
	paddd	0+48(%rbp),%xmm4

	pand	.Lclamp(%rip),%xmm0
	movdqa	%xmm0,0+0(%rbp)
	movdqa	%xmm4,0+16(%rbp)

	movq	%r8,%r8
	call	poly_hash_ad_internal
.Lopen_sse_main_loop:
	cmpq	$256,%rbx
	jb	.Lopen_sse_tail

	movdqa	.Lchacha20_consts(%rip),%xmm0
	movdqa	0+48(%rbp),%xmm4
	movdqa	0+64(%rbp),%xmm8
	movdqa	%xmm0,%xmm1
	movdqa	%xmm4,%xmm5
	movdqa	%xmm8,%xmm9
	movdqa	%xmm0,%xmm2
	movdqa	%xmm4,%xmm6
	movdqa	%xmm8,%xmm10
	movdqa	%xmm0,%xmm3
	movdqa	%xmm4,%xmm7
	movdqa	%xmm8,%xmm11
	movdqa	0+96(%rbp),%xmm15
	paddd	.Lsse_inc(%rip),%xmm15
	movdqa	%xmm15,%xmm14
	paddd	.Lsse_inc(%rip),%xmm14
	movdqa	%xmm14,%xmm13
	paddd	.Lsse_inc(%rip),%xmm13
	movdqa	%xmm13,%xmm12
	paddd	.Lsse_inc(%rip),%xmm12
	movdqa	%xmm12,0+96(%rbp)
	movdqa	%xmm13,0+112(%rbp)
	movdqa	%xmm14,0+128(%rbp)
	movdqa	%xmm15,0+144(%rbp)



	movq	$4,%rcx
	movq	%rsi,%r8
.Lopen_sse_main_loop_rounds:
	movdqa	%xmm8,0+80(%rbp)
	movdqa	.Lrol16(%rip),%xmm8
	paddd	%xmm7,%xmm3
	paddd	%xmm6,%xmm2
	paddd	%xmm5,%xmm1
	paddd	%xmm4,%xmm0
	pxor	%xmm3,%xmm15
	pxor	%xmm2,%xmm14
	pxor	%xmm1,%xmm13
	pxor	%xmm0,%xmm12
.byte	102,69,15,56,0,248
.byte	102,69,15,56,0,240
.byte	102,69,15,56,0,232
.byte	102,69,15,56,0,224
	movdqa	0+80(%rbp),%xmm8
	paddd	%xmm15,%xmm11
	paddd	%xmm14,%xmm10
	paddd	%xmm13,%xmm9
	paddd	%xmm12,%xmm8
	pxor	%xmm11,%xmm7
	addq	0+0(%r8),%r10
	adcq	8+0(%r8),%r11
	adcq	$1,%r12

	leaq	16(%r8),%r8
	pxor	%xmm10,%xmm6
	pxor	%xmm9,%xmm5
	pxor	%xmm8,%xmm4
	movdqa	%xmm8,0+80(%rbp)
	movdqa	%xmm7,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm7
	pxor	%xmm8,%xmm7
	movdqa	%xmm6,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm6
	pxor	%xmm8,%xmm6
	movdqa	%xmm5,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm5
	pxor	%xmm8,%xmm5
	movdqa	%xmm4,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm4
	pxor	%xmm8,%xmm4
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movdqa	.Lrol8(%rip),%xmm8
	paddd	%xmm7,%xmm3
	paddd	%xmm6,%xmm2
	paddd	%xmm5,%xmm1
	paddd	%xmm4,%xmm0
	pxor	%xmm3,%xmm15
	pxor	%xmm2,%xmm14
	pxor	%xmm1,%xmm13
	pxor	%xmm0,%xmm12
.byte	102,69,15,56,0,248
.byte	102,69,15,56,0,240
.byte	102,69,15,56,0,232
.byte	102,69,15,56,0,224
	movdqa	0+80(%rbp),%xmm8
	paddd	%xmm15,%xmm11
	paddd	%xmm14,%xmm10
	paddd	%xmm13,%xmm9
	paddd	%xmm12,%xmm8
	pxor	%xmm11,%xmm7
	pxor	%xmm10,%xmm6
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	pxor	%xmm9,%xmm5
	pxor	%xmm8,%xmm4
	movdqa	%xmm8,0+80(%rbp)
	movdqa	%xmm7,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm7
	pxor	%xmm8,%xmm7
	movdqa	%xmm6,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm6
	pxor	%xmm8,%xmm6
	movdqa	%xmm5,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm5
	pxor	%xmm8,%xmm5
	movdqa	%xmm4,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm4
	pxor	%xmm8,%xmm4
	movdqa	0+80(%rbp),%xmm8
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
.byte	102,15,58,15,255,4
.byte	102,69,15,58,15,219,8
.byte	102,69,15,58,15,255,12
.byte	102,15,58,15,246,4
.byte	102,69,15,58,15,210,8
.byte	102,69,15,58,15,246,12
.byte	102,15,58,15,237,4
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,12
.byte	102,15,58,15,228,4
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,12
	movdqa	%xmm8,0+80(%rbp)
	movdqa	.Lrol16(%rip),%xmm8
	paddd	%xmm7,%xmm3
	paddd	%xmm6,%xmm2
	paddd	%xmm5,%xmm1
	paddd	%xmm4,%xmm0
	pxor	%xmm3,%xmm15
	pxor	%xmm2,%xmm14
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	pxor	%xmm1,%xmm13
	pxor	%xmm0,%xmm12
.byte	102,69,15,56,0,248
.byte	102,69,15,56,0,240
.byte	102,69,15,56,0,232
.byte	102,69,15,56,0,224
	movdqa	0+80(%rbp),%xmm8
	paddd	%xmm15,%xmm11
	paddd	%xmm14,%xmm10
	paddd	%xmm13,%xmm9
	paddd	%xmm12,%xmm8
	pxor	%xmm11,%xmm7
	pxor	%xmm10,%xmm6
	pxor	%xmm9,%xmm5
	pxor	%xmm8,%xmm4
	movdqa	%xmm8,0+80(%rbp)
	movdqa	%xmm7,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm7
	pxor	%xmm8,%xmm7
	movdqa	%xmm6,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm6
	pxor	%xmm8,%xmm6
	movdqa	%xmm5,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm5
	pxor	%xmm8,%xmm5
	movdqa	%xmm4,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm4
	pxor	%xmm8,%xmm4
	movdqa	.Lrol8(%rip),%xmm8
	paddd	%xmm7,%xmm3
	paddd	%xmm6,%xmm2
	paddd	%xmm5,%xmm1
	paddd	%xmm4,%xmm0
	pxor	%xmm3,%xmm15
	pxor	%xmm2,%xmm14
	pxor	%xmm1,%xmm13
	pxor	%xmm0,%xmm12
.byte	102,69,15,56,0,248
.byte	102,69,15,56,0,240
.byte	102,69,15,56,0,232
.byte	102,69,15,56,0,224
	movdqa	0+80(%rbp),%xmm8
	paddd	%xmm15,%xmm11
	paddd	%xmm14,%xmm10
	paddd	%xmm13,%xmm9
	paddd	%xmm12,%xmm8
	pxor	%xmm11,%xmm7
	pxor	%xmm10,%xmm6
	pxor	%xmm9,%xmm5
	pxor	%xmm8,%xmm4
	movdqa	%xmm8,0+80(%rbp)
	movdqa	%xmm7,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm7
	pxor	%xmm8,%xmm7
	movdqa	%xmm6,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm6
	pxor	%xmm8,%xmm6
	movdqa	%xmm5,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm5
	pxor	%xmm8,%xmm5
	movdqa	%xmm4,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm4
	pxor	%xmm8,%xmm4
	movdqa	0+80(%rbp),%xmm8
.byte	102,15,58,15,255,12
.byte	102,69,15,58,15,219,8
.byte	102,69,15,58,15,255,4
.byte	102,15,58,15,246,12
.byte	102,69,15,58,15,210,8
.byte	102,69,15,58,15,246,4
.byte	102,15,58,15,237,12
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,4
.byte	102,15,58,15,228,12
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,4

	decq	%rcx
	jge	.Lopen_sse_main_loop_rounds
	addq	0+0(%r8),%r10
	adcq	8+0(%r8),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%r8),%r8
	cmpq	$-6,%rcx
	jg	.Lopen_sse_main_loop_rounds
	paddd	.Lchacha20_consts(%rip),%xmm3
	paddd	0+48(%rbp),%xmm7
	paddd	0+64(%rbp),%xmm11
	paddd	0+144(%rbp),%xmm15
	paddd	.Lchacha20_consts(%rip),%xmm2
	paddd	0+48(%rbp),%xmm6
	paddd	0+64(%rbp),%xmm10
	paddd	0+128(%rbp),%xmm14
	paddd	.Lchacha20_consts(%rip),%xmm1
	paddd	0+48(%rbp),%xmm5
	paddd	0+64(%rbp),%xmm9
	paddd	0+112(%rbp),%xmm13
	paddd	.Lchacha20_consts(%rip),%xmm0
	paddd	0+48(%rbp),%xmm4
	paddd	0+64(%rbp),%xmm8
	paddd	0+96(%rbp),%xmm12
	movdqa	%xmm12,0+80(%rbp)
	movdqu	0 + 0(%rsi),%xmm12
	pxor	%xmm3,%xmm12
	movdqu	%xmm12,0 + 0(%rdi)
	movdqu	16 + 0(%rsi),%xmm12
	pxor	%xmm7,%xmm12
	movdqu	%xmm12,16 + 0(%rdi)
	movdqu	32 + 0(%rsi),%xmm12
	pxor	%xmm11,%xmm12
	movdqu	%xmm12,32 + 0(%rdi)
	movdqu	48 + 0(%rsi),%xmm12
	pxor	%xmm15,%xmm12
	movdqu	%xmm12,48 + 0(%rdi)
	movdqu	0 + 64(%rsi),%xmm3
	movdqu	16 + 64(%rsi),%xmm7
	movdqu	32 + 64(%rsi),%xmm11
	movdqu	48 + 64(%rsi),%xmm15
	pxor	%xmm3,%xmm2
	pxor	%xmm7,%xmm6
	pxor	%xmm11,%xmm10
	pxor	%xmm14,%xmm15
	movdqu	%xmm2,0 + 64(%rdi)
	movdqu	%xmm6,16 + 64(%rdi)
	movdqu	%xmm10,32 + 64(%rdi)
	movdqu	%xmm15,48 + 64(%rdi)
	movdqu	0 + 128(%rsi),%xmm3
	movdqu	16 + 128(%rsi),%xmm7
	movdqu	32 + 128(%rsi),%xmm11
	movdqu	48 + 128(%rsi),%xmm15
	pxor	%xmm3,%xmm1
	pxor	%xmm7,%xmm5
	pxor	%xmm11,%xmm9
	pxor	%xmm13,%xmm15
	movdqu	%xmm1,0 + 128(%rdi)
	movdqu	%xmm5,16 + 128(%rdi)
	movdqu	%xmm9,32 + 128(%rdi)
	movdqu	%xmm15,48 + 128(%rdi)
	movdqu	0 + 192(%rsi),%xmm3
	movdqu	16 + 192(%rsi),%xmm7
	movdqu	32 + 192(%rsi),%xmm11
	movdqu	48 + 192(%rsi),%xmm15
	pxor	%xmm3,%xmm0
	pxor	%xmm7,%xmm4
	pxor	%xmm11,%xmm8
	pxor	0+80(%rbp),%xmm15
	movdqu	%xmm0,0 + 192(%rdi)
	movdqu	%xmm4,16 + 192(%rdi)
	movdqu	%xmm8,32 + 192(%rdi)
	movdqu	%xmm15,48 + 192(%rdi)

	leaq	256(%rsi),%rsi
	leaq	256(%rdi),%rdi
	subq	$256,%rbx
	jmp	.Lopen_sse_main_loop
.Lopen_sse_tail:

	testq	%rbx,%rbx
	jz	.Lopen_sse_finalize
	cmpq	$192,%rbx
	ja	.Lopen_sse_tail_256
	cmpq	$128,%rbx
	ja	.Lopen_sse_tail_192
	cmpq	$64,%rbx
	ja	.Lopen_sse_tail_128
	movdqa	.Lchacha20_consts(%rip),%xmm0
	movdqa	0+48(%rbp),%xmm4
	movdqa	0+64(%rbp),%xmm8
	movdqa	0+96(%rbp),%xmm12
	paddd	.Lsse_inc(%rip),%xmm12
	movdqa	%xmm12,0+96(%rbp)

	xorq	%r8,%r8
	movq	%rbx,%rcx
	cmpq	$16,%rcx
	jb	.Lopen_sse_tail_64_rounds
.Lopen_sse_tail_64_rounds_and_x1hash:
	addq	0+0(%rsi,%r8,1),%r10
	adcq	8+0(%rsi,%r8,1),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	subq	$16,%rcx
.Lopen_sse_tail_64_rounds:
	addq	$16,%r8
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,4
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,12
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,12
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,4

	cmpq	$16,%rcx
	jae	.Lopen_sse_tail_64_rounds_and_x1hash
	cmpq	$160,%r8
	jne	.Lopen_sse_tail_64_rounds
	paddd	.Lchacha20_consts(%rip),%xmm0
	paddd	0+48(%rbp),%xmm4
	paddd	0+64(%rbp),%xmm8
	paddd	0+96(%rbp),%xmm12

	jmp	.Lopen_sse_tail_64_dec_loop

.Lopen_sse_tail_128:
	movdqa	.Lchacha20_consts(%rip),%xmm0
	movdqa	0+48(%rbp),%xmm4
	movdqa	0+64(%rbp),%xmm8
	movdqa	%xmm0,%xmm1
	movdqa	%xmm4,%xmm5
	movdqa	%xmm8,%xmm9
	movdqa	0+96(%rbp),%xmm13
	paddd	.Lsse_inc(%rip),%xmm13
	movdqa	%xmm13,%xmm12
	paddd	.Lsse_inc(%rip),%xmm12
	movdqa	%xmm12,0+96(%rbp)
	movdqa	%xmm13,0+112(%rbp)

	movq	%rbx,%rcx
	andq	$-16,%rcx
	xorq	%r8,%r8
.Lopen_sse_tail_128_rounds_and_x1hash:
	addq	0+0(%rsi,%r8,1),%r10
	adcq	8+0(%rsi,%r8,1),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

.Lopen_sse_tail_128_rounds:
	addq	$16,%r8
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,4
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,12
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol16(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm5
	pxor	%xmm3,%xmm5
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol8(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm5
	pxor	%xmm3,%xmm5
.byte	102,15,58,15,237,4
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,12
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,12
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,4
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol16(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm5
	pxor	%xmm3,%xmm5
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol8(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm5
	pxor	%xmm3,%xmm5
.byte	102,15,58,15,237,12
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,4

	cmpq	%rcx,%r8
	jb	.Lopen_sse_tail_128_rounds_and_x1hash
	cmpq	$160,%r8
	jne	.Lopen_sse_tail_128_rounds
	paddd	.Lchacha20_consts(%rip),%xmm1
	paddd	0+48(%rbp),%xmm5
	paddd	0+64(%rbp),%xmm9
	paddd	0+112(%rbp),%xmm13
	paddd	.Lchacha20_consts(%rip),%xmm0
	paddd	0+48(%rbp),%xmm4
	paddd	0+64(%rbp),%xmm8
	paddd	0+96(%rbp),%xmm12
	movdqu	0 + 0(%rsi),%xmm3
	movdqu	16 + 0(%rsi),%xmm7
	movdqu	32 + 0(%rsi),%xmm11
	movdqu	48 + 0(%rsi),%xmm15
	pxor	%xmm3,%xmm1
	pxor	%xmm7,%xmm5
	pxor	%xmm11,%xmm9
	pxor	%xmm13,%xmm15
	movdqu	%xmm1,0 + 0(%rdi)
	movdqu	%xmm5,16 + 0(%rdi)
	movdqu	%xmm9,32 + 0(%rdi)
	movdqu	%xmm15,48 + 0(%rdi)

	subq	$64,%rbx
	leaq	64(%rsi),%rsi
	leaq	64(%rdi),%rdi
	jmp	.Lopen_sse_tail_64_dec_loop

.Lopen_sse_tail_192:
	movdqa	.Lchacha20_consts(%rip),%xmm0
	movdqa	0+48(%rbp),%xmm4
	movdqa	0+64(%rbp),%xmm8
	movdqa	%xmm0,%xmm1
	movdqa	%xmm4,%xmm5
	movdqa	%xmm8,%xmm9
	movdqa	%xmm0,%xmm2
	movdqa	%xmm4,%xmm6
	movdqa	%xmm8,%xmm10
	movdqa	0+96(%rbp),%xmm14
	paddd	.Lsse_inc(%rip),%xmm14
	movdqa	%xmm14,%xmm13
	paddd	.Lsse_inc(%rip),%xmm13
	movdqa	%xmm13,%xmm12
	paddd	.Lsse_inc(%rip),%xmm12
	movdqa	%xmm12,0+96(%rbp)
	movdqa	%xmm13,0+112(%rbp)
	movdqa	%xmm14,0+128(%rbp)

	movq	%rbx,%rcx
	movq	$160,%r8
	cmpq	$160,%rcx
	cmovgq	%r8,%rcx
	andq	$-16,%rcx
	xorq	%r8,%r8
.Lopen_sse_tail_192_rounds_and_x1hash:
	addq	0+0(%rsi,%r8,1),%r10
	adcq	8+0(%rsi,%r8,1),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

.Lopen_sse_tail_192_rounds:
	addq	$16,%r8
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,4
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,12
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol16(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm5
	pxor	%xmm3,%xmm5
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol8(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm5
	pxor	%xmm3,%xmm5
.byte	102,15,58,15,237,4
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,12
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol16(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm6
	pxor	%xmm3,%xmm6
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol8(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm6
	pxor	%xmm3,%xmm6
.byte	102,15,58,15,246,4
.byte	102,69,15,58,15,210,8
.byte	102,69,15,58,15,246,12
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,12
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,4
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol16(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm5
	pxor	%xmm3,%xmm5
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol8(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm5
	pxor	%xmm3,%xmm5
.byte	102,15,58,15,237,12
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,4
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol16(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm6
	pxor	%xmm3,%xmm6
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol8(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm6
	pxor	%xmm3,%xmm6
.byte	102,15,58,15,246,12
.byte	102,69,15,58,15,210,8
.byte	102,69,15,58,15,246,4

	cmpq	%rcx,%r8
	jb	.Lopen_sse_tail_192_rounds_and_x1hash
	cmpq	$160,%r8
	jne	.Lopen_sse_tail_192_rounds
	cmpq	$176,%rbx
	jb	.Lopen_sse_tail_192_finish
	addq	0+160(%rsi),%r10
	adcq	8+160(%rsi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	cmpq	$192,%rbx
	jb	.Lopen_sse_tail_192_finish
	addq	0+176(%rsi),%r10
	adcq	8+176(%rsi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

.Lopen_sse_tail_192_finish:
	paddd	.Lchacha20_consts(%rip),%xmm2
	paddd	0+48(%rbp),%xmm6
	paddd	0+64(%rbp),%xmm10
	paddd	0+128(%rbp),%xmm14
	paddd	.Lchacha20_consts(%rip),%xmm1
	paddd	0+48(%rbp),%xmm5
	paddd	0+64(%rbp),%xmm9
	paddd	0+112(%rbp),%xmm13
	paddd	.Lchacha20_consts(%rip),%xmm0
	paddd	0+48(%rbp),%xmm4
	paddd	0+64(%rbp),%xmm8
	paddd	0+96(%rbp),%xmm12
	movdqu	0 + 0(%rsi),%xmm3
	movdqu	16 + 0(%rsi),%xmm7
	movdqu	32 + 0(%rsi),%xmm11
	movdqu	48 + 0(%rsi),%xmm15
	pxor	%xmm3,%xmm2
	pxor	%xmm7,%xmm6
	pxor	%xmm11,%xmm10
	pxor	%xmm14,%xmm15
	movdqu	%xmm2,0 + 0(%rdi)
	movdqu	%xmm6,16 + 0(%rdi)
	movdqu	%xmm10,32 + 0(%rdi)
	movdqu	%xmm15,48 + 0(%rdi)
	movdqu	0 + 64(%rsi),%xmm3
	movdqu	16 + 64(%rsi),%xmm7
	movdqu	32 + 64(%rsi),%xmm11
	movdqu	48 + 64(%rsi),%xmm15
	pxor	%xmm3,%xmm1
	pxor	%xmm7,%xmm5
	pxor	%xmm11,%xmm9
	pxor	%xmm13,%xmm15
	movdqu	%xmm1,0 + 64(%rdi)
	movdqu	%xmm5,16 + 64(%rdi)
	movdqu	%xmm9,32 + 64(%rdi)
	movdqu	%xmm15,48 + 64(%rdi)

	subq	$128,%rbx
	leaq	128(%rsi),%rsi
	leaq	128(%rdi),%rdi
	jmp	.Lopen_sse_tail_64_dec_loop

.Lopen_sse_tail_256:
	movdqa	.Lchacha20_consts(%rip),%xmm0
	movdqa	0+48(%rbp),%xmm4
	movdqa	0+64(%rbp),%xmm8
	movdqa	%xmm0,%xmm1
	movdqa	%xmm4,%xmm5
	movdqa	%xmm8,%xmm9
	movdqa	%xmm0,%xmm2
	movdqa	%xmm4,%xmm6
	movdqa	%xmm8,%xmm10
	movdqa	%xmm0,%xmm3
	movdqa	%xmm4,%xmm7
	movdqa	%xmm8,%xmm11
	movdqa	0+96(%rbp),%xmm15
	paddd	.Lsse_inc(%rip),%xmm15
	movdqa	%xmm15,%xmm14
	paddd	.Lsse_inc(%rip),%xmm14
	movdqa	%xmm14,%xmm13
	paddd	.Lsse_inc(%rip),%xmm13
	movdqa	%xmm13,%xmm12
	paddd	.Lsse_inc(%rip),%xmm12
	movdqa	%xmm12,0+96(%rbp)
	movdqa	%xmm13,0+112(%rbp)
	movdqa	%xmm14,0+128(%rbp)
	movdqa	%xmm15,0+144(%rbp)

	xorq	%r8,%r8
.Lopen_sse_tail_256_rounds_and_x1hash:
	addq	0+0(%rsi,%r8,1),%r10
	adcq	8+0(%rsi,%r8,1),%r11
	adcq	$1,%r12
	movdqa	%xmm11,0+80(%rbp)
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm11
	pslld	$12,%xmm11
	psrld	$20,%xmm4
	pxor	%xmm11,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm11
	pslld	$7,%xmm11
	psrld	$25,%xmm4
	pxor	%xmm11,%xmm4
.byte	102,15,58,15,228,4
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,12
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol16(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm11
	pslld	$12,%xmm11
	psrld	$20,%xmm5
	pxor	%xmm11,%xmm5
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol8(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm11
	pslld	$7,%xmm11
	psrld	$25,%xmm5
	pxor	%xmm11,%xmm5
.byte	102,15,58,15,237,4
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,12
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol16(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm11
	pslld	$12,%xmm11
	psrld	$20,%xmm6
	pxor	%xmm11,%xmm6
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol8(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm11
	pslld	$7,%xmm11
	psrld	$25,%xmm6
	pxor	%xmm11,%xmm6
.byte	102,15,58,15,246,4
.byte	102,69,15,58,15,210,8
.byte	102,69,15,58,15,246,12
	movdqa	0+80(%rbp),%xmm11
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movdqa	%xmm9,0+80(%rbp)
	paddd	%xmm7,%xmm3
	pxor	%xmm3,%xmm15
	pshufb	.Lrol16(%rip),%xmm15
	paddd	%xmm15,%xmm11
	pxor	%xmm11,%xmm7
	movdqa	%xmm7,%xmm9
	pslld	$12,%xmm9
	psrld	$20,%xmm7
	pxor	%xmm9,%xmm7
	paddd	%xmm7,%xmm3
	pxor	%xmm3,%xmm15
	pshufb	.Lrol8(%rip),%xmm15
	paddd	%xmm15,%xmm11
	pxor	%xmm11,%xmm7
	movdqa	%xmm7,%xmm9
	pslld	$7,%xmm9
	psrld	$25,%xmm7
	pxor	%xmm9,%xmm7
.byte	102,15,58,15,255,4
.byte	102,69,15,58,15,219,8
.byte	102,69,15,58,15,255,12
	movdqa	0+80(%rbp),%xmm9
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	movdqa	%xmm11,0+80(%rbp)
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm11
	pslld	$12,%xmm11
	psrld	$20,%xmm4
	pxor	%xmm11,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm11
	pslld	$7,%xmm11
	psrld	$25,%xmm4
	pxor	%xmm11,%xmm4
.byte	102,15,58,15,228,12
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,4
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol16(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm11
	pslld	$12,%xmm11
	psrld	$20,%xmm5
	pxor	%xmm11,%xmm5
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol8(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm11
	pslld	$7,%xmm11
	psrld	$25,%xmm5
	pxor	%xmm11,%xmm5
.byte	102,15,58,15,237,12
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,4
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol16(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm11
	pslld	$12,%xmm11
	psrld	$20,%xmm6
	pxor	%xmm11,%xmm6
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol8(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm11
	pslld	$7,%xmm11
	psrld	$25,%xmm6
	pxor	%xmm11,%xmm6
.byte	102,15,58,15,246,12
.byte	102,69,15,58,15,210,8
.byte	102,69,15,58,15,246,4
	movdqa	0+80(%rbp),%xmm11
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	movdqa	%xmm9,0+80(%rbp)
	paddd	%xmm7,%xmm3
	pxor	%xmm3,%xmm15
	pshufb	.Lrol16(%rip),%xmm15
	paddd	%xmm15,%xmm11
	pxor	%xmm11,%xmm7
	movdqa	%xmm7,%xmm9
	pslld	$12,%xmm9
	psrld	$20,%xmm7
	pxor	%xmm9,%xmm7
	paddd	%xmm7,%xmm3
	pxor	%xmm3,%xmm15
	pshufb	.Lrol8(%rip),%xmm15
	paddd	%xmm15,%xmm11
	pxor	%xmm11,%xmm7
	movdqa	%xmm7,%xmm9
	pslld	$7,%xmm9
	psrld	$25,%xmm7
	pxor	%xmm9,%xmm7
.byte	102,15,58,15,255,12
.byte	102,69,15,58,15,219,8
.byte	102,69,15,58,15,255,4
	movdqa	0+80(%rbp),%xmm9

	addq	$16,%r8
	cmpq	$160,%r8
	jb	.Lopen_sse_tail_256_rounds_and_x1hash

	movq	%rbx,%rcx
	andq	$-16,%rcx
.Lopen_sse_tail_256_hash:
	addq	0+0(%rsi,%r8,1),%r10
	adcq	8+0(%rsi,%r8,1),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	addq	$16,%r8
	cmpq	%rcx,%r8
	jb	.Lopen_sse_tail_256_hash
	paddd	.Lchacha20_consts(%rip),%xmm3
	paddd	0+48(%rbp),%xmm7
	paddd	0+64(%rbp),%xmm11
	paddd	0+144(%rbp),%xmm15
	paddd	.Lchacha20_consts(%rip),%xmm2
	paddd	0+48(%rbp),%xmm6
	paddd	0+64(%rbp),%xmm10
	paddd	0+128(%rbp),%xmm14
	paddd	.Lchacha20_consts(%rip),%xmm1
	paddd	0+48(%rbp),%xmm5
	paddd	0+64(%rbp),%xmm9
	paddd	0+112(%rbp),%xmm13
	paddd	.Lchacha20_consts(%rip),%xmm0
	paddd	0+48(%rbp),%xmm4
	paddd	0+64(%rbp),%xmm8
	paddd	0+96(%rbp),%xmm12
	movdqa	%xmm12,0+80(%rbp)
	movdqu	0 + 0(%rsi),%xmm12
	pxor	%xmm3,%xmm12
	movdqu	%xmm12,0 + 0(%rdi)
	movdqu	16 + 0(%rsi),%xmm12
	pxor	%xmm7,%xmm12
	movdqu	%xmm12,16 + 0(%rdi)
	movdqu	32 + 0(%rsi),%xmm12
	pxor	%xmm11,%xmm12
	movdqu	%xmm12,32 + 0(%rdi)
	movdqu	48 + 0(%rsi),%xmm12
	pxor	%xmm15,%xmm12
	movdqu	%xmm12,48 + 0(%rdi)
	movdqu	0 + 64(%rsi),%xmm3
	movdqu	16 + 64(%rsi),%xmm7
	movdqu	32 + 64(%rsi),%xmm11
	movdqu	48 + 64(%rsi),%xmm15
	pxor	%xmm3,%xmm2
	pxor	%xmm7,%xmm6
	pxor	%xmm11,%xmm10
	pxor	%xmm14,%xmm15
	movdqu	%xmm2,0 + 64(%rdi)
	movdqu	%xmm6,16 + 64(%rdi)
	movdqu	%xmm10,32 + 64(%rdi)
	movdqu	%xmm15,48 + 64(%rdi)
	movdqu	0 + 128(%rsi),%xmm3
	movdqu	16 + 128(%rsi),%xmm7
	movdqu	32 + 128(%rsi),%xmm11
	movdqu	48 + 128(%rsi),%xmm15
	pxor	%xmm3,%xmm1
	pxor	%xmm7,%xmm5
	pxor	%xmm11,%xmm9
	pxor	%xmm13,%xmm15
	movdqu	%xmm1,0 + 128(%rdi)
	movdqu	%xmm5,16 + 128(%rdi)
	movdqu	%xmm9,32 + 128(%rdi)
	movdqu	%xmm15,48 + 128(%rdi)

	movdqa	0+80(%rbp),%xmm12
	subq	$192,%rbx
	leaq	192(%rsi),%rsi
	leaq	192(%rdi),%rdi


.Lopen_sse_tail_64_dec_loop:
	cmpq	$16,%rbx
	jb	.Lopen_sse_tail_16_init
	subq	$16,%rbx
	movdqu	(%rsi),%xmm3
	pxor	%xmm3,%xmm0
	movdqu	%xmm0,(%rdi)
	leaq	16(%rsi),%rsi
	leaq	16(%rdi),%rdi
	movdqa	%xmm4,%xmm0
	movdqa	%xmm8,%xmm4
	movdqa	%xmm12,%xmm8
	jmp	.Lopen_sse_tail_64_dec_loop
.Lopen_sse_tail_16_init:
	movdqa	%xmm0,%xmm1


.Lopen_sse_tail_16:
	testq	%rbx,%rbx
	jz	.Lopen_sse_finalize



	pxor	%xmm3,%xmm3
	leaq	-1(%rsi,%rbx,1),%rsi
	movq	%rbx,%r8
.Lopen_sse_tail_16_compose:
	pslldq	$1,%xmm3
	pinsrb	$0,(%rsi),%xmm3
	subq	$1,%rsi
	subq	$1,%r8
	jnz	.Lopen_sse_tail_16_compose

.byte	102,73,15,126,221
	pextrq	$1,%xmm3,%r14

	pxor	%xmm1,%xmm3


.Lopen_sse_tail_16_extract:
	pextrb	$0,%xmm3,(%rdi)
	psrldq	$1,%xmm3
	addq	$1,%rdi
	subq	$1,%rbx
	jne	.Lopen_sse_tail_16_extract

	addq	%r13,%r10
	adcq	%r14,%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12


.Lopen_sse_finalize:
	addq	0+0+32(%rbp),%r10
	adcq	8+0+32(%rbp),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12


	movq	%r10,%r13
	movq	%r11,%r14
	movq	%r12,%r15
	subq	$-5,%r10
	sbbq	$-1,%r11
	sbbq	$3,%r12
	cmovcq	%r13,%r10
	cmovcq	%r14,%r11
	cmovcq	%r15,%r12

	addq	0+0+16(%rbp),%r10
	adcq	8+0+16(%rbp),%r11

.cfi_remember_state	
	addq	$288 + 0 + 32,%rsp
.cfi_adjust_cfa_offset	-(288 + 32)

	popq	%r9
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r9
	movq	%r10,(%r9)
	movq	%r11,8(%r9)
	popq	%r15
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r15
	popq	%r14
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r14
	popq	%r13
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r13
	popq	%r12
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r12
	popq	%rbx
.cfi_adjust_cfa_offset	-8
.cfi_restore	%rbx
	popq	%rbp
.cfi_adjust_cfa_offset	-8
.cfi_restore	%rbp
	ret

.Lopen_sse_128:
.cfi_restore_state	
	movdqu	.Lchacha20_consts(%rip),%xmm0
	movdqa	%xmm0,%xmm1
	movdqa	%xmm0,%xmm2
	movdqu	0(%r9),%xmm4
	movdqa	%xmm4,%xmm5
	movdqa	%xmm4,%xmm6
	movdqu	16(%r9),%xmm8
	movdqa	%xmm8,%xmm9
	movdqa	%xmm8,%xmm10
	movdqu	32(%r9),%xmm12
	movdqa	%xmm12,%xmm13
	paddd	.Lsse_inc(%rip),%xmm13
	movdqa	%xmm13,%xmm14
	paddd	.Lsse_inc(%rip),%xmm14
	movdqa	%xmm4,%xmm7
	movdqa	%xmm8,%xmm11
	movdqa	%xmm13,%xmm15
	movq	$10,%r10

.Lopen_sse_128_rounds:
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,4
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,12
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol16(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm5
	pxor	%xmm3,%xmm5
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol8(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm5
	pxor	%xmm3,%xmm5
.byte	102,15,58,15,237,4
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,12
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol16(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm6
	pxor	%xmm3,%xmm6
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol8(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm6
	pxor	%xmm3,%xmm6
.byte	102,15,58,15,246,4
.byte	102,69,15,58,15,210,8
.byte	102,69,15,58,15,246,12
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,12
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,4
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol16(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm5
	pxor	%xmm3,%xmm5
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol8(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm5
	pxor	%xmm3,%xmm5
.byte	102,15,58,15,237,12
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,4
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol16(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm6
	pxor	%xmm3,%xmm6
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol8(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm6
	pxor	%xmm3,%xmm6
.byte	102,15,58,15,246,12
.byte	102,69,15,58,15,210,8
.byte	102,69,15,58,15,246,4

	decq	%r10
	jnz	.Lopen_sse_128_rounds
	paddd	.Lchacha20_consts(%rip),%xmm0
	paddd	.Lchacha20_consts(%rip),%xmm1
	paddd	.Lchacha20_consts(%rip),%xmm2
	paddd	%xmm7,%xmm4
	paddd	%xmm7,%xmm5
	paddd	%xmm7,%xmm6
	paddd	%xmm11,%xmm9
	paddd	%xmm11,%xmm10
	paddd	%xmm15,%xmm13
	paddd	.Lsse_inc(%rip),%xmm15
	paddd	%xmm15,%xmm14

	pand	.Lclamp(%rip),%xmm0
	movdqa	%xmm0,0+0(%rbp)
	movdqa	%xmm4,0+16(%rbp)

	movq	%r8,%r8
	call	poly_hash_ad_internal
.Lopen_sse_128_xor_hash:
	cmpq	$16,%rbx
	jb	.Lopen_sse_tail_16
	subq	$16,%rbx
	addq	0+0(%rsi),%r10
	adcq	8+0(%rsi),%r11
	adcq	$1,%r12


	movdqu	0(%rsi),%xmm3
	pxor	%xmm3,%xmm1
	movdqu	%xmm1,0(%rdi)
	leaq	16(%rsi),%rsi
	leaq	16(%rdi),%rdi
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12


	movdqa	%xmm5,%xmm1
	movdqa	%xmm9,%xmm5
	movdqa	%xmm13,%xmm9
	movdqa	%xmm2,%xmm13
	movdqa	%xmm6,%xmm2
	movdqa	%xmm10,%xmm6
	movdqa	%xmm14,%xmm10
	jmp	.Lopen_sse_128_xor_hash
.size	chacha20_poly1305_open, .-chacha20_poly1305_open
.cfi_endproc	







.globl	chacha20_poly1305_seal
.hidden chacha20_poly1305_seal
.type	chacha20_poly1305_seal,@function
.align	64
chacha20_poly1305_seal:
.cfi_startproc	
_CET_ENDBR
	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56


	pushq	%r9
.cfi_adjust_cfa_offset	8
.cfi_offset	%r9,-64
	subq	$288 + 0 + 32,%rsp
.cfi_adjust_cfa_offset	288 + 32
	leaq	32(%rsp),%rbp
	andq	$-32,%rbp

	movq	56(%r9),%rbx
	addq	%rdx,%rbx
	movq	%r8,0+0+32(%rbp)
	movq	%rbx,8+0+32(%rbp)
	movq	%rdx,%rbx

	movl	OPENSSL_ia32cap_P+8(%rip),%eax
	andl	$288,%eax
	xorl	$288,%eax
	jz	chacha20_poly1305_seal_avx2

	cmpq	$128,%rbx
	jbe	.Lseal_sse_128

	movdqa	.Lchacha20_consts(%rip),%xmm0
	movdqu	0(%r9),%xmm4
	movdqu	16(%r9),%xmm8
	movdqu	32(%r9),%xmm12

	movdqa	%xmm0,%xmm1
	movdqa	%xmm0,%xmm2
	movdqa	%xmm0,%xmm3
	movdqa	%xmm4,%xmm5
	movdqa	%xmm4,%xmm6
	movdqa	%xmm4,%xmm7
	movdqa	%xmm8,%xmm9
	movdqa	%xmm8,%xmm10
	movdqa	%xmm8,%xmm11
	movdqa	%xmm12,%xmm15
	paddd	.Lsse_inc(%rip),%xmm12
	movdqa	%xmm12,%xmm14
	paddd	.Lsse_inc(%rip),%xmm12
	movdqa	%xmm12,%xmm13
	paddd	.Lsse_inc(%rip),%xmm12

	movdqa	%xmm4,0+48(%rbp)
	movdqa	%xmm8,0+64(%rbp)
	movdqa	%xmm12,0+96(%rbp)
	movdqa	%xmm13,0+112(%rbp)
	movdqa	%xmm14,0+128(%rbp)
	movdqa	%xmm15,0+144(%rbp)
	movq	$10,%r10
.Lseal_sse_init_rounds:
	movdqa	%xmm8,0+80(%rbp)
	movdqa	.Lrol16(%rip),%xmm8
	paddd	%xmm7,%xmm3
	paddd	%xmm6,%xmm2
	paddd	%xmm5,%xmm1
	paddd	%xmm4,%xmm0
	pxor	%xmm3,%xmm15
	pxor	%xmm2,%xmm14
	pxor	%xmm1,%xmm13
	pxor	%xmm0,%xmm12
.byte	102,69,15,56,0,248
.byte	102,69,15,56,0,240
.byte	102,69,15,56,0,232
.byte	102,69,15,56,0,224
	movdqa	0+80(%rbp),%xmm8
	paddd	%xmm15,%xmm11
	paddd	%xmm14,%xmm10
	paddd	%xmm13,%xmm9
	paddd	%xmm12,%xmm8
	pxor	%xmm11,%xmm7
	pxor	%xmm10,%xmm6
	pxor	%xmm9,%xmm5
	pxor	%xmm8,%xmm4
	movdqa	%xmm8,0+80(%rbp)
	movdqa	%xmm7,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm7
	pxor	%xmm8,%xmm7
	movdqa	%xmm6,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm6
	pxor	%xmm8,%xmm6
	movdqa	%xmm5,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm5
	pxor	%xmm8,%xmm5
	movdqa	%xmm4,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm4
	pxor	%xmm8,%xmm4
	movdqa	.Lrol8(%rip),%xmm8
	paddd	%xmm7,%xmm3
	paddd	%xmm6,%xmm2
	paddd	%xmm5,%xmm1
	paddd	%xmm4,%xmm0
	pxor	%xmm3,%xmm15
	pxor	%xmm2,%xmm14
	pxor	%xmm1,%xmm13
	pxor	%xmm0,%xmm12
.byte	102,69,15,56,0,248
.byte	102,69,15,56,0,240
.byte	102,69,15,56,0,232
.byte	102,69,15,56,0,224
	movdqa	0+80(%rbp),%xmm8
	paddd	%xmm15,%xmm11
	paddd	%xmm14,%xmm10
	paddd	%xmm13,%xmm9
	paddd	%xmm12,%xmm8
	pxor	%xmm11,%xmm7
	pxor	%xmm10,%xmm6
	pxor	%xmm9,%xmm5
	pxor	%xmm8,%xmm4
	movdqa	%xmm8,0+80(%rbp)
	movdqa	%xmm7,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm7
	pxor	%xmm8,%xmm7
	movdqa	%xmm6,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm6
	pxor	%xmm8,%xmm6
	movdqa	%xmm5,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm5
	pxor	%xmm8,%xmm5
	movdqa	%xmm4,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm4
	pxor	%xmm8,%xmm4
	movdqa	0+80(%rbp),%xmm8
.byte	102,15,58,15,255,4
.byte	102,69,15,58,15,219,8
.byte	102,69,15,58,15,255,12
.byte	102,15,58,15,246,4
.byte	102,69,15,58,15,210,8
.byte	102,69,15,58,15,246,12
.byte	102,15,58,15,237,4
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,12
.byte	102,15,58,15,228,4
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,12
	movdqa	%xmm8,0+80(%rbp)
	movdqa	.Lrol16(%rip),%xmm8
	paddd	%xmm7,%xmm3
	paddd	%xmm6,%xmm2
	paddd	%xmm5,%xmm1
	paddd	%xmm4,%xmm0
	pxor	%xmm3,%xmm15
	pxor	%xmm2,%xmm14
	pxor	%xmm1,%xmm13
	pxor	%xmm0,%xmm12
.byte	102,69,15,56,0,248
.byte	102,69,15,56,0,240
.byte	102,69,15,56,0,232
.byte	102,69,15,56,0,224
	movdqa	0+80(%rbp),%xmm8
	paddd	%xmm15,%xmm11
	paddd	%xmm14,%xmm10
	paddd	%xmm13,%xmm9
	paddd	%xmm12,%xmm8
	pxor	%xmm11,%xmm7
	pxor	%xmm10,%xmm6
	pxor	%xmm9,%xmm5
	pxor	%xmm8,%xmm4
	movdqa	%xmm8,0+80(%rbp)
	movdqa	%xmm7,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm7
	pxor	%xmm8,%xmm7
	movdqa	%xmm6,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm6
	pxor	%xmm8,%xmm6
	movdqa	%xmm5,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm5
	pxor	%xmm8,%xmm5
	movdqa	%xmm4,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm4
	pxor	%xmm8,%xmm4
	movdqa	.Lrol8(%rip),%xmm8
	paddd	%xmm7,%xmm3
	paddd	%xmm6,%xmm2
	paddd	%xmm5,%xmm1
	paddd	%xmm4,%xmm0
	pxor	%xmm3,%xmm15
	pxor	%xmm2,%xmm14
	pxor	%xmm1,%xmm13
	pxor	%xmm0,%xmm12
.byte	102,69,15,56,0,248
.byte	102,69,15,56,0,240
.byte	102,69,15,56,0,232
.byte	102,69,15,56,0,224
	movdqa	0+80(%rbp),%xmm8
	paddd	%xmm15,%xmm11
	paddd	%xmm14,%xmm10
	paddd	%xmm13,%xmm9
	paddd	%xmm12,%xmm8
	pxor	%xmm11,%xmm7
	pxor	%xmm10,%xmm6
	pxor	%xmm9,%xmm5
	pxor	%xmm8,%xmm4
	movdqa	%xmm8,0+80(%rbp)
	movdqa	%xmm7,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm7
	pxor	%xmm8,%xmm7
	movdqa	%xmm6,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm6
	pxor	%xmm8,%xmm6
	movdqa	%xmm5,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm5
	pxor	%xmm8,%xmm5
	movdqa	%xmm4,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm4
	pxor	%xmm8,%xmm4
	movdqa	0+80(%rbp),%xmm8
.byte	102,15,58,15,255,12
.byte	102,69,15,58,15,219,8
.byte	102,69,15,58,15,255,4
.byte	102,15,58,15,246,12
.byte	102,69,15,58,15,210,8
.byte	102,69,15,58,15,246,4
.byte	102,15,58,15,237,12
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,4
.byte	102,15,58,15,228,12
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,4

	decq	%r10
	jnz	.Lseal_sse_init_rounds
	paddd	.Lchacha20_consts(%rip),%xmm3
	paddd	0+48(%rbp),%xmm7
	paddd	0+64(%rbp),%xmm11
	paddd	0+144(%rbp),%xmm15
	paddd	.Lchacha20_consts(%rip),%xmm2
	paddd	0+48(%rbp),%xmm6
	paddd	0+64(%rbp),%xmm10
	paddd	0+128(%rbp),%xmm14
	paddd	.Lchacha20_consts(%rip),%xmm1
	paddd	0+48(%rbp),%xmm5
	paddd	0+64(%rbp),%xmm9
	paddd	0+112(%rbp),%xmm13
	paddd	.Lchacha20_consts(%rip),%xmm0
	paddd	0+48(%rbp),%xmm4
	paddd	0+64(%rbp),%xmm8
	paddd	0+96(%rbp),%xmm12


	pand	.Lclamp(%rip),%xmm3
	movdqa	%xmm3,0+0(%rbp)
	movdqa	%xmm7,0+16(%rbp)

	movq	%r8,%r8
	call	poly_hash_ad_internal
	movdqu	0 + 0(%rsi),%xmm3
	movdqu	16 + 0(%rsi),%xmm7
	movdqu	32 + 0(%rsi),%xmm11
	movdqu	48 + 0(%rsi),%xmm15
	pxor	%xmm3,%xmm2
	pxor	%xmm7,%xmm6
	pxor	%xmm11,%xmm10
	pxor	%xmm14,%xmm15
	movdqu	%xmm2,0 + 0(%rdi)
	movdqu	%xmm6,16 + 0(%rdi)
	movdqu	%xmm10,32 + 0(%rdi)
	movdqu	%xmm15,48 + 0(%rdi)
	movdqu	0 + 64(%rsi),%xmm3
	movdqu	16 + 64(%rsi),%xmm7
	movdqu	32 + 64(%rsi),%xmm11
	movdqu	48 + 64(%rsi),%xmm15
	pxor	%xmm3,%xmm1
	pxor	%xmm7,%xmm5
	pxor	%xmm11,%xmm9
	pxor	%xmm13,%xmm15
	movdqu	%xmm1,0 + 64(%rdi)
	movdqu	%xmm5,16 + 64(%rdi)
	movdqu	%xmm9,32 + 64(%rdi)
	movdqu	%xmm15,48 + 64(%rdi)

	cmpq	$192,%rbx
	ja	.Lseal_sse_main_init
	movq	$128,%rcx
	subq	$128,%rbx
	leaq	128(%rsi),%rsi
	jmp	.Lseal_sse_128_tail_hash
.Lseal_sse_main_init:
	movdqu	0 + 128(%rsi),%xmm3
	movdqu	16 + 128(%rsi),%xmm7
	movdqu	32 + 128(%rsi),%xmm11
	movdqu	48 + 128(%rsi),%xmm15
	pxor	%xmm3,%xmm0
	pxor	%xmm7,%xmm4
	pxor	%xmm11,%xmm8
	pxor	%xmm12,%xmm15
	movdqu	%xmm0,0 + 128(%rdi)
	movdqu	%xmm4,16 + 128(%rdi)
	movdqu	%xmm8,32 + 128(%rdi)
	movdqu	%xmm15,48 + 128(%rdi)

	movq	$192,%rcx
	subq	$192,%rbx
	leaq	192(%rsi),%rsi
	movq	$2,%rcx
	movq	$8,%r8
	cmpq	$64,%rbx
	jbe	.Lseal_sse_tail_64
	cmpq	$128,%rbx
	jbe	.Lseal_sse_tail_128
	cmpq	$192,%rbx
	jbe	.Lseal_sse_tail_192

.Lseal_sse_main_loop:
	movdqa	.Lchacha20_consts(%rip),%xmm0
	movdqa	0+48(%rbp),%xmm4
	movdqa	0+64(%rbp),%xmm8
	movdqa	%xmm0,%xmm1
	movdqa	%xmm4,%xmm5
	movdqa	%xmm8,%xmm9
	movdqa	%xmm0,%xmm2
	movdqa	%xmm4,%xmm6
	movdqa	%xmm8,%xmm10
	movdqa	%xmm0,%xmm3
	movdqa	%xmm4,%xmm7
	movdqa	%xmm8,%xmm11
	movdqa	0+96(%rbp),%xmm15
	paddd	.Lsse_inc(%rip),%xmm15
	movdqa	%xmm15,%xmm14
	paddd	.Lsse_inc(%rip),%xmm14
	movdqa	%xmm14,%xmm13
	paddd	.Lsse_inc(%rip),%xmm13
	movdqa	%xmm13,%xmm12
	paddd	.Lsse_inc(%rip),%xmm12
	movdqa	%xmm12,0+96(%rbp)
	movdqa	%xmm13,0+112(%rbp)
	movdqa	%xmm14,0+128(%rbp)
	movdqa	%xmm15,0+144(%rbp)

.align	32
.Lseal_sse_main_rounds:
	movdqa	%xmm8,0+80(%rbp)
	movdqa	.Lrol16(%rip),%xmm8
	paddd	%xmm7,%xmm3
	paddd	%xmm6,%xmm2
	paddd	%xmm5,%xmm1
	paddd	%xmm4,%xmm0
	pxor	%xmm3,%xmm15
	pxor	%xmm2,%xmm14
	pxor	%xmm1,%xmm13
	pxor	%xmm0,%xmm12
.byte	102,69,15,56,0,248
.byte	102,69,15,56,0,240
.byte	102,69,15,56,0,232
.byte	102,69,15,56,0,224
	movdqa	0+80(%rbp),%xmm8
	paddd	%xmm15,%xmm11
	paddd	%xmm14,%xmm10
	paddd	%xmm13,%xmm9
	paddd	%xmm12,%xmm8
	pxor	%xmm11,%xmm7
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	pxor	%xmm10,%xmm6
	pxor	%xmm9,%xmm5
	pxor	%xmm8,%xmm4
	movdqa	%xmm8,0+80(%rbp)
	movdqa	%xmm7,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm7
	pxor	%xmm8,%xmm7
	movdqa	%xmm6,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm6
	pxor	%xmm8,%xmm6
	movdqa	%xmm5,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm5
	pxor	%xmm8,%xmm5
	movdqa	%xmm4,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm4
	pxor	%xmm8,%xmm4
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movdqa	.Lrol8(%rip),%xmm8
	paddd	%xmm7,%xmm3
	paddd	%xmm6,%xmm2
	paddd	%xmm5,%xmm1
	paddd	%xmm4,%xmm0
	pxor	%xmm3,%xmm15
	pxor	%xmm2,%xmm14
	pxor	%xmm1,%xmm13
	pxor	%xmm0,%xmm12
.byte	102,69,15,56,0,248
.byte	102,69,15,56,0,240
.byte	102,69,15,56,0,232
.byte	102,69,15,56,0,224
	movdqa	0+80(%rbp),%xmm8
	paddd	%xmm15,%xmm11
	paddd	%xmm14,%xmm10
	paddd	%xmm13,%xmm9
	paddd	%xmm12,%xmm8
	pxor	%xmm11,%xmm7
	pxor	%xmm10,%xmm6
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	pxor	%xmm9,%xmm5
	pxor	%xmm8,%xmm4
	movdqa	%xmm8,0+80(%rbp)
	movdqa	%xmm7,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm7
	pxor	%xmm8,%xmm7
	movdqa	%xmm6,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm6
	pxor	%xmm8,%xmm6
	movdqa	%xmm5,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm5
	pxor	%xmm8,%xmm5
	movdqa	%xmm4,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm4
	pxor	%xmm8,%xmm4
	movdqa	0+80(%rbp),%xmm8
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
.byte	102,15,58,15,255,4
.byte	102,69,15,58,15,219,8
.byte	102,69,15,58,15,255,12
.byte	102,15,58,15,246,4
.byte	102,69,15,58,15,210,8
.byte	102,69,15,58,15,246,12
.byte	102,15,58,15,237,4
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,12
.byte	102,15,58,15,228,4
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,12
	movdqa	%xmm8,0+80(%rbp)
	movdqa	.Lrol16(%rip),%xmm8
	paddd	%xmm7,%xmm3
	paddd	%xmm6,%xmm2
	paddd	%xmm5,%xmm1
	paddd	%xmm4,%xmm0
	pxor	%xmm3,%xmm15
	pxor	%xmm2,%xmm14
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	pxor	%xmm1,%xmm13
	pxor	%xmm0,%xmm12
.byte	102,69,15,56,0,248
.byte	102,69,15,56,0,240
.byte	102,69,15,56,0,232
.byte	102,69,15,56,0,224
	movdqa	0+80(%rbp),%xmm8
	paddd	%xmm15,%xmm11
	paddd	%xmm14,%xmm10
	paddd	%xmm13,%xmm9
	paddd	%xmm12,%xmm8
	pxor	%xmm11,%xmm7
	pxor	%xmm10,%xmm6
	pxor	%xmm9,%xmm5
	pxor	%xmm8,%xmm4
	movdqa	%xmm8,0+80(%rbp)
	movdqa	%xmm7,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm7
	pxor	%xmm8,%xmm7
	movdqa	%xmm6,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm6
	pxor	%xmm8,%xmm6
	movdqa	%xmm5,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm5
	pxor	%xmm8,%xmm5
	movdqa	%xmm4,%xmm8
	psrld	$20,%xmm8
	pslld	$32-20,%xmm4
	pxor	%xmm8,%xmm4
	movdqa	.Lrol8(%rip),%xmm8
	paddd	%xmm7,%xmm3
	paddd	%xmm6,%xmm2
	paddd	%xmm5,%xmm1
	paddd	%xmm4,%xmm0
	pxor	%xmm3,%xmm15
	pxor	%xmm2,%xmm14
	pxor	%xmm1,%xmm13
	pxor	%xmm0,%xmm12
.byte	102,69,15,56,0,248
.byte	102,69,15,56,0,240
.byte	102,69,15,56,0,232
.byte	102,69,15,56,0,224
	movdqa	0+80(%rbp),%xmm8
	paddd	%xmm15,%xmm11
	paddd	%xmm14,%xmm10
	paddd	%xmm13,%xmm9
	paddd	%xmm12,%xmm8
	pxor	%xmm11,%xmm7
	pxor	%xmm10,%xmm6
	pxor	%xmm9,%xmm5
	pxor	%xmm8,%xmm4
	movdqa	%xmm8,0+80(%rbp)
	movdqa	%xmm7,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm7
	pxor	%xmm8,%xmm7
	movdqa	%xmm6,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm6
	pxor	%xmm8,%xmm6
	movdqa	%xmm5,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm5
	pxor	%xmm8,%xmm5
	movdqa	%xmm4,%xmm8
	psrld	$25,%xmm8
	pslld	$32-25,%xmm4
	pxor	%xmm8,%xmm4
	movdqa	0+80(%rbp),%xmm8
.byte	102,15,58,15,255,12
.byte	102,69,15,58,15,219,8
.byte	102,69,15,58,15,255,4
.byte	102,15,58,15,246,12
.byte	102,69,15,58,15,210,8
.byte	102,69,15,58,15,246,4
.byte	102,15,58,15,237,12
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,4
.byte	102,15,58,15,228,12
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,4

	leaq	16(%rdi),%rdi
	decq	%r8
	jge	.Lseal_sse_main_rounds
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%rdi),%rdi
	decq	%rcx
	jg	.Lseal_sse_main_rounds
	paddd	.Lchacha20_consts(%rip),%xmm3
	paddd	0+48(%rbp),%xmm7
	paddd	0+64(%rbp),%xmm11
	paddd	0+144(%rbp),%xmm15
	paddd	.Lchacha20_consts(%rip),%xmm2
	paddd	0+48(%rbp),%xmm6
	paddd	0+64(%rbp),%xmm10
	paddd	0+128(%rbp),%xmm14
	paddd	.Lchacha20_consts(%rip),%xmm1
	paddd	0+48(%rbp),%xmm5
	paddd	0+64(%rbp),%xmm9
	paddd	0+112(%rbp),%xmm13
	paddd	.Lchacha20_consts(%rip),%xmm0
	paddd	0+48(%rbp),%xmm4
	paddd	0+64(%rbp),%xmm8
	paddd	0+96(%rbp),%xmm12

	movdqa	%xmm14,0+80(%rbp)
	movdqa	%xmm14,0+80(%rbp)
	movdqu	0 + 0(%rsi),%xmm14
	pxor	%xmm3,%xmm14
	movdqu	%xmm14,0 + 0(%rdi)
	movdqu	16 + 0(%rsi),%xmm14
	pxor	%xmm7,%xmm14
	movdqu	%xmm14,16 + 0(%rdi)
	movdqu	32 + 0(%rsi),%xmm14
	pxor	%xmm11,%xmm14
	movdqu	%xmm14,32 + 0(%rdi)
	movdqu	48 + 0(%rsi),%xmm14
	pxor	%xmm15,%xmm14
	movdqu	%xmm14,48 + 0(%rdi)

	movdqa	0+80(%rbp),%xmm14
	movdqu	0 + 64(%rsi),%xmm3
	movdqu	16 + 64(%rsi),%xmm7
	movdqu	32 + 64(%rsi),%xmm11
	movdqu	48 + 64(%rsi),%xmm15
	pxor	%xmm3,%xmm2
	pxor	%xmm7,%xmm6
	pxor	%xmm11,%xmm10
	pxor	%xmm14,%xmm15
	movdqu	%xmm2,0 + 64(%rdi)
	movdqu	%xmm6,16 + 64(%rdi)
	movdqu	%xmm10,32 + 64(%rdi)
	movdqu	%xmm15,48 + 64(%rdi)
	movdqu	0 + 128(%rsi),%xmm3
	movdqu	16 + 128(%rsi),%xmm7
	movdqu	32 + 128(%rsi),%xmm11
	movdqu	48 + 128(%rsi),%xmm15
	pxor	%xmm3,%xmm1
	pxor	%xmm7,%xmm5
	pxor	%xmm11,%xmm9
	pxor	%xmm13,%xmm15
	movdqu	%xmm1,0 + 128(%rdi)
	movdqu	%xmm5,16 + 128(%rdi)
	movdqu	%xmm9,32 + 128(%rdi)
	movdqu	%xmm15,48 + 128(%rdi)

	cmpq	$256,%rbx
	ja	.Lseal_sse_main_loop_xor

	movq	$192,%rcx
	subq	$192,%rbx
	leaq	192(%rsi),%rsi
	jmp	.Lseal_sse_128_tail_hash
.Lseal_sse_main_loop_xor:
	movdqu	0 + 192(%rsi),%xmm3
	movdqu	16 + 192(%rsi),%xmm7
	movdqu	32 + 192(%rsi),%xmm11
	movdqu	48 + 192(%rsi),%xmm15
	pxor	%xmm3,%xmm0
	pxor	%xmm7,%xmm4
	pxor	%xmm11,%xmm8
	pxor	%xmm12,%xmm15
	movdqu	%xmm0,0 + 192(%rdi)
	movdqu	%xmm4,16 + 192(%rdi)
	movdqu	%xmm8,32 + 192(%rdi)
	movdqu	%xmm15,48 + 192(%rdi)

	leaq	256(%rsi),%rsi
	subq	$256,%rbx
	movq	$6,%rcx
	movq	$4,%r8
	cmpq	$192,%rbx
	jg	.Lseal_sse_main_loop
	movq	%rbx,%rcx
	testq	%rbx,%rbx
	je	.Lseal_sse_128_tail_hash
	movq	$6,%rcx
	cmpq	$128,%rbx
	ja	.Lseal_sse_tail_192
	cmpq	$64,%rbx
	ja	.Lseal_sse_tail_128

.Lseal_sse_tail_64:
	movdqa	.Lchacha20_consts(%rip),%xmm0
	movdqa	0+48(%rbp),%xmm4
	movdqa	0+64(%rbp),%xmm8
	movdqa	0+96(%rbp),%xmm12
	paddd	.Lsse_inc(%rip),%xmm12
	movdqa	%xmm12,0+96(%rbp)

.Lseal_sse_tail_64_rounds_and_x2hash:
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%rdi),%rdi
.Lseal_sse_tail_64_rounds_and_x1hash:
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,4
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,12
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,12
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,4
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%rdi),%rdi
	decq	%rcx
	jg	.Lseal_sse_tail_64_rounds_and_x2hash
	decq	%r8
	jge	.Lseal_sse_tail_64_rounds_and_x1hash
	paddd	.Lchacha20_consts(%rip),%xmm0
	paddd	0+48(%rbp),%xmm4
	paddd	0+64(%rbp),%xmm8
	paddd	0+96(%rbp),%xmm12

	jmp	.Lseal_sse_128_tail_xor

.Lseal_sse_tail_128:
	movdqa	.Lchacha20_consts(%rip),%xmm0
	movdqa	0+48(%rbp),%xmm4
	movdqa	0+64(%rbp),%xmm8
	movdqa	%xmm0,%xmm1
	movdqa	%xmm4,%xmm5
	movdqa	%xmm8,%xmm9
	movdqa	0+96(%rbp),%xmm13
	paddd	.Lsse_inc(%rip),%xmm13
	movdqa	%xmm13,%xmm12
	paddd	.Lsse_inc(%rip),%xmm12
	movdqa	%xmm12,0+96(%rbp)
	movdqa	%xmm13,0+112(%rbp)

.Lseal_sse_tail_128_rounds_and_x2hash:
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%rdi),%rdi
.Lseal_sse_tail_128_rounds_and_x1hash:
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,4
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,12
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol16(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm5
	pxor	%xmm3,%xmm5
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol8(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm5
	pxor	%xmm3,%xmm5
.byte	102,15,58,15,237,4
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,12
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,12
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,4
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol16(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm5
	pxor	%xmm3,%xmm5
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol8(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm5
	pxor	%xmm3,%xmm5
.byte	102,15,58,15,237,12
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,4

	leaq	16(%rdi),%rdi
	decq	%rcx
	jg	.Lseal_sse_tail_128_rounds_and_x2hash
	decq	%r8
	jge	.Lseal_sse_tail_128_rounds_and_x1hash
	paddd	.Lchacha20_consts(%rip),%xmm1
	paddd	0+48(%rbp),%xmm5
	paddd	0+64(%rbp),%xmm9
	paddd	0+112(%rbp),%xmm13
	paddd	.Lchacha20_consts(%rip),%xmm0
	paddd	0+48(%rbp),%xmm4
	paddd	0+64(%rbp),%xmm8
	paddd	0+96(%rbp),%xmm12
	movdqu	0 + 0(%rsi),%xmm3
	movdqu	16 + 0(%rsi),%xmm7
	movdqu	32 + 0(%rsi),%xmm11
	movdqu	48 + 0(%rsi),%xmm15
	pxor	%xmm3,%xmm1
	pxor	%xmm7,%xmm5
	pxor	%xmm11,%xmm9
	pxor	%xmm13,%xmm15
	movdqu	%xmm1,0 + 0(%rdi)
	movdqu	%xmm5,16 + 0(%rdi)
	movdqu	%xmm9,32 + 0(%rdi)
	movdqu	%xmm15,48 + 0(%rdi)

	movq	$64,%rcx
	subq	$64,%rbx
	leaq	64(%rsi),%rsi
	jmp	.Lseal_sse_128_tail_hash

.Lseal_sse_tail_192:
	movdqa	.Lchacha20_consts(%rip),%xmm0
	movdqa	0+48(%rbp),%xmm4
	movdqa	0+64(%rbp),%xmm8
	movdqa	%xmm0,%xmm1
	movdqa	%xmm4,%xmm5
	movdqa	%xmm8,%xmm9
	movdqa	%xmm0,%xmm2
	movdqa	%xmm4,%xmm6
	movdqa	%xmm8,%xmm10
	movdqa	0+96(%rbp),%xmm14
	paddd	.Lsse_inc(%rip),%xmm14
	movdqa	%xmm14,%xmm13
	paddd	.Lsse_inc(%rip),%xmm13
	movdqa	%xmm13,%xmm12
	paddd	.Lsse_inc(%rip),%xmm12
	movdqa	%xmm12,0+96(%rbp)
	movdqa	%xmm13,0+112(%rbp)
	movdqa	%xmm14,0+128(%rbp)

.Lseal_sse_tail_192_rounds_and_x2hash:
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%rdi),%rdi
.Lseal_sse_tail_192_rounds_and_x1hash:
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,4
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,12
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol16(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm5
	pxor	%xmm3,%xmm5
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol8(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm5
	pxor	%xmm3,%xmm5
.byte	102,15,58,15,237,4
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,12
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol16(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm6
	pxor	%xmm3,%xmm6
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol8(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm6
	pxor	%xmm3,%xmm6
.byte	102,15,58,15,246,4
.byte	102,69,15,58,15,210,8
.byte	102,69,15,58,15,246,12
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,12
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,4
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol16(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm5
	pxor	%xmm3,%xmm5
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol8(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm5
	pxor	%xmm3,%xmm5
.byte	102,15,58,15,237,12
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,4
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol16(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm6
	pxor	%xmm3,%xmm6
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol8(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm6
	pxor	%xmm3,%xmm6
.byte	102,15,58,15,246,12
.byte	102,69,15,58,15,210,8
.byte	102,69,15,58,15,246,4

	leaq	16(%rdi),%rdi
	decq	%rcx
	jg	.Lseal_sse_tail_192_rounds_and_x2hash
	decq	%r8
	jge	.Lseal_sse_tail_192_rounds_and_x1hash
	paddd	.Lchacha20_consts(%rip),%xmm2
	paddd	0+48(%rbp),%xmm6
	paddd	0+64(%rbp),%xmm10
	paddd	0+128(%rbp),%xmm14
	paddd	.Lchacha20_consts(%rip),%xmm1
	paddd	0+48(%rbp),%xmm5
	paddd	0+64(%rbp),%xmm9
	paddd	0+112(%rbp),%xmm13
	paddd	.Lchacha20_consts(%rip),%xmm0
	paddd	0+48(%rbp),%xmm4
	paddd	0+64(%rbp),%xmm8
	paddd	0+96(%rbp),%xmm12
	movdqu	0 + 0(%rsi),%xmm3
	movdqu	16 + 0(%rsi),%xmm7
	movdqu	32 + 0(%rsi),%xmm11
	movdqu	48 + 0(%rsi),%xmm15
	pxor	%xmm3,%xmm2
	pxor	%xmm7,%xmm6
	pxor	%xmm11,%xmm10
	pxor	%xmm14,%xmm15
	movdqu	%xmm2,0 + 0(%rdi)
	movdqu	%xmm6,16 + 0(%rdi)
	movdqu	%xmm10,32 + 0(%rdi)
	movdqu	%xmm15,48 + 0(%rdi)
	movdqu	0 + 64(%rsi),%xmm3
	movdqu	16 + 64(%rsi),%xmm7
	movdqu	32 + 64(%rsi),%xmm11
	movdqu	48 + 64(%rsi),%xmm15
	pxor	%xmm3,%xmm1
	pxor	%xmm7,%xmm5
	pxor	%xmm11,%xmm9
	pxor	%xmm13,%xmm15
	movdqu	%xmm1,0 + 64(%rdi)
	movdqu	%xmm5,16 + 64(%rdi)
	movdqu	%xmm9,32 + 64(%rdi)
	movdqu	%xmm15,48 + 64(%rdi)

	movq	$128,%rcx
	subq	$128,%rbx
	leaq	128(%rsi),%rsi

.Lseal_sse_128_tail_hash:
	cmpq	$16,%rcx
	jb	.Lseal_sse_128_tail_xor
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	subq	$16,%rcx
	leaq	16(%rdi),%rdi
	jmp	.Lseal_sse_128_tail_hash

.Lseal_sse_128_tail_xor:
	cmpq	$16,%rbx
	jb	.Lseal_sse_tail_16
	subq	$16,%rbx

	movdqu	0(%rsi),%xmm3
	pxor	%xmm3,%xmm0
	movdqu	%xmm0,0(%rdi)

	addq	0(%rdi),%r10
	adcq	8(%rdi),%r11
	adcq	$1,%r12
	leaq	16(%rsi),%rsi
	leaq	16(%rdi),%rdi
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12


	movdqa	%xmm4,%xmm0
	movdqa	%xmm8,%xmm4
	movdqa	%xmm12,%xmm8
	movdqa	%xmm1,%xmm12
	movdqa	%xmm5,%xmm1
	movdqa	%xmm9,%xmm5
	movdqa	%xmm13,%xmm9
	jmp	.Lseal_sse_128_tail_xor

.Lseal_sse_tail_16:
	testq	%rbx,%rbx
	jz	.Lprocess_blocks_of_extra_in

	movq	%rbx,%r8
	movq	%rbx,%rcx
	leaq	-1(%rsi,%rbx,1),%rsi
	pxor	%xmm15,%xmm15
.Lseal_sse_tail_16_compose:
	pslldq	$1,%xmm15
	pinsrb	$0,(%rsi),%xmm15
	leaq	-1(%rsi),%rsi
	decq	%rcx
	jne	.Lseal_sse_tail_16_compose


	pxor	%xmm0,%xmm15


	movq	%rbx,%rcx
	movdqu	%xmm15,%xmm0
.Lseal_sse_tail_16_extract:
	pextrb	$0,%xmm0,(%rdi)
	psrldq	$1,%xmm0
	addq	$1,%rdi
	subq	$1,%rcx
	jnz	.Lseal_sse_tail_16_extract








	movq	288 + 0 + 32(%rsp),%r9
	movq	56(%r9),%r14
	movq	48(%r9),%r13
	testq	%r14,%r14
	jz	.Lprocess_partial_block

	movq	$16,%r15
	subq	%rbx,%r15
	cmpq	%r15,%r14

	jge	.Lload_extra_in
	movq	%r14,%r15

.Lload_extra_in:


	leaq	-1(%r13,%r15,1),%rsi


	addq	%r15,%r13
	subq	%r15,%r14
	movq	%r13,48(%r9)
	movq	%r14,56(%r9)



	addq	%r15,%r8


	pxor	%xmm11,%xmm11
.Lload_extra_load_loop:
	pslldq	$1,%xmm11
	pinsrb	$0,(%rsi),%xmm11
	leaq	-1(%rsi),%rsi
	subq	$1,%r15
	jnz	.Lload_extra_load_loop




	movq	%rbx,%r15

.Lload_extra_shift_loop:
	pslldq	$1,%xmm11
	subq	$1,%r15
	jnz	.Lload_extra_shift_loop




	leaq	.Land_masks(%rip),%r15
	shlq	$4,%rbx
	pand	-16(%r15,%rbx,1),%xmm15


	por	%xmm11,%xmm15



.byte	102,77,15,126,253
	pextrq	$1,%xmm15,%r14
	addq	%r13,%r10
	adcq	%r14,%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12


.Lprocess_blocks_of_extra_in:

	movq	288+32+0 (%rsp),%r9
	movq	48(%r9),%rsi
	movq	56(%r9),%r8
	movq	%r8,%rcx
	shrq	$4,%r8

.Lprocess_extra_hash_loop:
	jz	process_extra_in_trailer
	addq	0+0(%rsi),%r10
	adcq	8+0(%rsi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%rsi),%rsi
	subq	$1,%r8
	jmp	.Lprocess_extra_hash_loop
process_extra_in_trailer:
	andq	$15,%rcx
	movq	%rcx,%rbx
	jz	.Ldo_length_block
	leaq	-1(%rsi,%rcx,1),%rsi

.Lprocess_extra_in_trailer_load:
	pslldq	$1,%xmm15
	pinsrb	$0,(%rsi),%xmm15
	leaq	-1(%rsi),%rsi
	subq	$1,%rcx
	jnz	.Lprocess_extra_in_trailer_load

.Lprocess_partial_block:

	leaq	.Land_masks(%rip),%r15
	shlq	$4,%rbx
	pand	-16(%r15,%rbx,1),%xmm15
.byte	102,77,15,126,253
	pextrq	$1,%xmm15,%r14
	addq	%r13,%r10
	adcq	%r14,%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12


.Ldo_length_block:
	addq	0+0+32(%rbp),%r10
	adcq	8+0+32(%rbp),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12


	movq	%r10,%r13
	movq	%r11,%r14
	movq	%r12,%r15
	subq	$-5,%r10
	sbbq	$-1,%r11
	sbbq	$3,%r12
	cmovcq	%r13,%r10
	cmovcq	%r14,%r11
	cmovcq	%r15,%r12

	addq	0+0+16(%rbp),%r10
	adcq	8+0+16(%rbp),%r11

.cfi_remember_state	
	addq	$288 + 0 + 32,%rsp
.cfi_adjust_cfa_offset	-(288 + 32)

	popq	%r9
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r9
	movq	%r10,(%r9)
	movq	%r11,8(%r9)
	popq	%r15
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r15
	popq	%r14
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r14
	popq	%r13
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r13
	popq	%r12
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r12
	popq	%rbx
.cfi_adjust_cfa_offset	-8
.cfi_restore	%rbx
	popq	%rbp
.cfi_adjust_cfa_offset	-8
.cfi_restore	%rbp
	ret

.Lseal_sse_128:
.cfi_restore_state	
	movdqu	.Lchacha20_consts(%rip),%xmm0
	movdqa	%xmm0,%xmm1
	movdqa	%xmm0,%xmm2
	movdqu	0(%r9),%xmm4
	movdqa	%xmm4,%xmm5
	movdqa	%xmm4,%xmm6
	movdqu	16(%r9),%xmm8
	movdqa	%xmm8,%xmm9
	movdqa	%xmm8,%xmm10
	movdqu	32(%r9),%xmm14
	movdqa	%xmm14,%xmm12
	paddd	.Lsse_inc(%rip),%xmm12
	movdqa	%xmm12,%xmm13
	paddd	.Lsse_inc(%rip),%xmm13
	movdqa	%xmm4,%xmm7
	movdqa	%xmm8,%xmm11
	movdqa	%xmm12,%xmm15
	movq	$10,%r10

.Lseal_sse_128_rounds:
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,4
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,12
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol16(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm5
	pxor	%xmm3,%xmm5
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol8(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm5
	pxor	%xmm3,%xmm5
.byte	102,15,58,15,237,4
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,12
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol16(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm6
	pxor	%xmm3,%xmm6
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol8(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm6
	pxor	%xmm3,%xmm6
.byte	102,15,58,15,246,4
.byte	102,69,15,58,15,210,8
.byte	102,69,15,58,15,246,12
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol16(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm4
	pxor	%xmm3,%xmm4
	paddd	%xmm4,%xmm0
	pxor	%xmm0,%xmm12
	pshufb	.Lrol8(%rip),%xmm12
	paddd	%xmm12,%xmm8
	pxor	%xmm8,%xmm4
	movdqa	%xmm4,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm4
	pxor	%xmm3,%xmm4
.byte	102,15,58,15,228,12
.byte	102,69,15,58,15,192,8
.byte	102,69,15,58,15,228,4
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol16(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm5
	pxor	%xmm3,%xmm5
	paddd	%xmm5,%xmm1
	pxor	%xmm1,%xmm13
	pshufb	.Lrol8(%rip),%xmm13
	paddd	%xmm13,%xmm9
	pxor	%xmm9,%xmm5
	movdqa	%xmm5,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm5
	pxor	%xmm3,%xmm5
.byte	102,15,58,15,237,12
.byte	102,69,15,58,15,201,8
.byte	102,69,15,58,15,237,4
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol16(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm3
	pslld	$12,%xmm3
	psrld	$20,%xmm6
	pxor	%xmm3,%xmm6
	paddd	%xmm6,%xmm2
	pxor	%xmm2,%xmm14
	pshufb	.Lrol8(%rip),%xmm14
	paddd	%xmm14,%xmm10
	pxor	%xmm10,%xmm6
	movdqa	%xmm6,%xmm3
	pslld	$7,%xmm3
	psrld	$25,%xmm6
	pxor	%xmm3,%xmm6
.byte	102,15,58,15,246,12
.byte	102,69,15,58,15,210,8
.byte	102,69,15,58,15,246,4

	decq	%r10
	jnz	.Lseal_sse_128_rounds
	paddd	.Lchacha20_consts(%rip),%xmm0
	paddd	.Lchacha20_consts(%rip),%xmm1
	paddd	.Lchacha20_consts(%rip),%xmm2
	paddd	%xmm7,%xmm4
	paddd	%xmm7,%xmm5
	paddd	%xmm7,%xmm6
	paddd	%xmm11,%xmm8
	paddd	%xmm11,%xmm9
	paddd	%xmm15,%xmm12
	paddd	.Lsse_inc(%rip),%xmm15
	paddd	%xmm15,%xmm13

	pand	.Lclamp(%rip),%xmm2
	movdqa	%xmm2,0+0(%rbp)
	movdqa	%xmm6,0+16(%rbp)

	movq	%r8,%r8
	call	poly_hash_ad_internal
	jmp	.Lseal_sse_128_tail_xor
.size	chacha20_poly1305_seal, .-chacha20_poly1305_seal
.cfi_endproc	


.type	chacha20_poly1305_open_avx2,@function
.align	64
chacha20_poly1305_open_avx2:
.cfi_startproc	


.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
.cfi_adjust_cfa_offset	8
.cfi_offset	%r9,-64
.cfi_adjust_cfa_offset	288 + 32

	vzeroupper
	vmovdqa	.Lchacha20_consts(%rip),%ymm0
	vbroadcasti128	0(%r9),%ymm4
	vbroadcasti128	16(%r9),%ymm8
	vbroadcasti128	32(%r9),%ymm12
	vpaddd	.Lavx2_init(%rip),%ymm12,%ymm12
	cmpq	$192,%rbx
	jbe	.Lopen_avx2_192
	cmpq	$320,%rbx
	jbe	.Lopen_avx2_320

	vmovdqa	%ymm4,0+64(%rbp)
	vmovdqa	%ymm8,0+96(%rbp)
	vmovdqa	%ymm12,0+160(%rbp)
	movq	$10,%r10
.Lopen_avx2_init_rounds:
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$12,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$4,%ymm4,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$4,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$12,%ymm4,%ymm4,%ymm4

	decq	%r10
	jne	.Lopen_avx2_init_rounds
	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
	vpaddd	0+64(%rbp),%ymm4,%ymm4
	vpaddd	0+96(%rbp),%ymm8,%ymm8
	vpaddd	0+160(%rbp),%ymm12,%ymm12

	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3

	vpand	.Lclamp(%rip),%ymm3,%ymm3
	vmovdqa	%ymm3,0+0(%rbp)

	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4

	movq	%r8,%r8
	call	poly_hash_ad_internal

	xorq	%rcx,%rcx
.Lopen_avx2_init_hash:
	addq	0+0(%rsi,%rcx,1),%r10
	adcq	8+0(%rsi,%rcx,1),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	addq	$16,%rcx
	cmpq	$64,%rcx
	jne	.Lopen_avx2_init_hash

	vpxor	0(%rsi),%ymm0,%ymm0
	vpxor	32(%rsi),%ymm4,%ymm4

	vmovdqu	%ymm0,0(%rdi)
	vmovdqu	%ymm4,32(%rdi)
	leaq	64(%rsi),%rsi
	leaq	64(%rdi),%rdi
	subq	$64,%rbx
.Lopen_avx2_main_loop:

	cmpq	$512,%rbx
	jb	.Lopen_avx2_main_loop_done
	vmovdqa	.Lchacha20_consts(%rip),%ymm0
	vmovdqa	0+64(%rbp),%ymm4
	vmovdqa	0+96(%rbp),%ymm8
	vmovdqa	%ymm0,%ymm1
	vmovdqa	%ymm4,%ymm5
	vmovdqa	%ymm8,%ymm9
	vmovdqa	%ymm0,%ymm2
	vmovdqa	%ymm4,%ymm6
	vmovdqa	%ymm8,%ymm10
	vmovdqa	%ymm0,%ymm3
	vmovdqa	%ymm4,%ymm7
	vmovdqa	%ymm8,%ymm11
	vmovdqa	.Lavx2_inc(%rip),%ymm12
	vpaddd	0+160(%rbp),%ymm12,%ymm15
	vpaddd	%ymm15,%ymm12,%ymm14
	vpaddd	%ymm14,%ymm12,%ymm13
	vpaddd	%ymm13,%ymm12,%ymm12
	vmovdqa	%ymm15,0+256(%rbp)
	vmovdqa	%ymm14,0+224(%rbp)
	vmovdqa	%ymm13,0+192(%rbp)
	vmovdqa	%ymm12,0+160(%rbp)

	xorq	%rcx,%rcx
.Lopen_avx2_main_loop_rounds:
	addq	0+0(%rsi,%rcx,1),%r10
	adcq	8+0(%rsi,%rcx,1),%r11
	adcq	$1,%r12
	vmovdqa	%ymm8,0+128(%rbp)
	vmovdqa	.Lrol16(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$20,%ymm7,%ymm8
	vpslld	$32-20,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$20,%ymm6,%ymm8
	vpslld	$32-20,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$20,%ymm5,%ymm8
	vpslld	$32-20,%ymm5,%ymm5
	addq	%rax,%r15
	adcq	%rdx,%r9
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$20,%ymm4,%ymm8
	vpslld	$32-20,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	.Lrol8(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	addq	0+16(%rsi,%rcx,1),%r10
	adcq	8+16(%rsi,%rcx,1),%r11
	adcq	$1,%r12
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$25,%ymm7,%ymm8
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	vpslld	$32-25,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$25,%ymm6,%ymm8
	vpslld	$32-25,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$25,%ymm5,%ymm8
	vpslld	$32-25,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$25,%ymm4,%ymm8
	vpslld	$32-25,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	0+128(%rbp),%ymm8
	vpalignr	$4,%ymm7,%ymm7,%ymm7
	vpalignr	$8,%ymm11,%ymm11,%ymm11
	vpalignr	$12,%ymm15,%ymm15,%ymm15
	vpalignr	$4,%ymm6,%ymm6,%ymm6
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$12,%ymm14,%ymm14,%ymm14
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	vpalignr	$4,%ymm5,%ymm5,%ymm5
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$12,%ymm13,%ymm13,%ymm13
	vpalignr	$4,%ymm4,%ymm4,%ymm4
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$12,%ymm12,%ymm12,%ymm12
	vmovdqa	%ymm8,0+128(%rbp)
	vmovdqa	.Lrol16(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	addq	%rax,%r15
	adcq	%rdx,%r9
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$20,%ymm7,%ymm8
	vpslld	$32-20,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$20,%ymm6,%ymm8
	vpslld	$32-20,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	addq	0+32(%rsi,%rcx,1),%r10
	adcq	8+32(%rsi,%rcx,1),%r11
	adcq	$1,%r12

	leaq	48(%rcx),%rcx
	vpsrld	$20,%ymm5,%ymm8
	vpslld	$32-20,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$20,%ymm4,%ymm8
	vpslld	$32-20,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	.Lrol8(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$25,%ymm7,%ymm8
	vpslld	$32-25,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$25,%ymm6,%ymm8
	vpslld	$32-25,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	addq	%rax,%r15
	adcq	%rdx,%r9
	vpsrld	$25,%ymm5,%ymm8
	vpslld	$32-25,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$25,%ymm4,%ymm8
	vpslld	$32-25,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	0+128(%rbp),%ymm8
	vpalignr	$12,%ymm7,%ymm7,%ymm7
	vpalignr	$8,%ymm11,%ymm11,%ymm11
	vpalignr	$4,%ymm15,%ymm15,%ymm15
	vpalignr	$12,%ymm6,%ymm6,%ymm6
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$4,%ymm14,%ymm14,%ymm14
	vpalignr	$12,%ymm5,%ymm5,%ymm5
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$4,%ymm13,%ymm13,%ymm13
	vpalignr	$12,%ymm4,%ymm4,%ymm4
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	vpalignr	$4,%ymm12,%ymm12,%ymm12

	cmpq	$60*8,%rcx
	jne	.Lopen_avx2_main_loop_rounds
	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
	vpaddd	0+64(%rbp),%ymm7,%ymm7
	vpaddd	0+96(%rbp),%ymm11,%ymm11
	vpaddd	0+256(%rbp),%ymm15,%ymm15
	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
	vpaddd	0+64(%rbp),%ymm6,%ymm6
	vpaddd	0+96(%rbp),%ymm10,%ymm10
	vpaddd	0+224(%rbp),%ymm14,%ymm14
	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
	vpaddd	0+64(%rbp),%ymm5,%ymm5
	vpaddd	0+96(%rbp),%ymm9,%ymm9
	vpaddd	0+192(%rbp),%ymm13,%ymm13
	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
	vpaddd	0+64(%rbp),%ymm4,%ymm4
	vpaddd	0+96(%rbp),%ymm8,%ymm8
	vpaddd	0+160(%rbp),%ymm12,%ymm12

	vmovdqa	%ymm0,0+128(%rbp)
	addq	0+60*8(%rsi),%r10
	adcq	8+60*8(%rsi),%r11
	adcq	$1,%r12
	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
	vpxor	0+0(%rsi),%ymm0,%ymm0
	vpxor	32+0(%rsi),%ymm3,%ymm3
	vpxor	64+0(%rsi),%ymm7,%ymm7
	vpxor	96+0(%rsi),%ymm11,%ymm11
	vmovdqu	%ymm0,0+0(%rdi)
	vmovdqu	%ymm3,32+0(%rdi)
	vmovdqu	%ymm7,64+0(%rdi)
	vmovdqu	%ymm11,96+0(%rdi)

	vmovdqa	0+128(%rbp),%ymm0
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
	vpxor	0+128(%rsi),%ymm3,%ymm3
	vpxor	32+128(%rsi),%ymm2,%ymm2
	vpxor	64+128(%rsi),%ymm6,%ymm6
	vpxor	96+128(%rsi),%ymm10,%ymm10
	vmovdqu	%ymm3,0+128(%rdi)
	vmovdqu	%ymm2,32+128(%rdi)
	vmovdqu	%ymm6,64+128(%rdi)
	vmovdqu	%ymm10,96+128(%rdi)
	addq	0+60*8+16(%rsi),%r10
	adcq	8+60*8+16(%rsi),%r11
	adcq	$1,%r12
	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
	vpxor	0+256(%rsi),%ymm3,%ymm3
	vpxor	32+256(%rsi),%ymm1,%ymm1
	vpxor	64+256(%rsi),%ymm5,%ymm5
	vpxor	96+256(%rsi),%ymm9,%ymm9
	vmovdqu	%ymm3,0+256(%rdi)
	vmovdqu	%ymm1,32+256(%rdi)
	vmovdqu	%ymm5,64+256(%rdi)
	vmovdqu	%ymm9,96+256(%rdi)
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
	vpxor	0+384(%rsi),%ymm3,%ymm3
	vpxor	32+384(%rsi),%ymm0,%ymm0
	vpxor	64+384(%rsi),%ymm4,%ymm4
	vpxor	96+384(%rsi),%ymm8,%ymm8
	vmovdqu	%ymm3,0+384(%rdi)
	vmovdqu	%ymm0,32+384(%rdi)
	vmovdqu	%ymm4,64+384(%rdi)
	vmovdqu	%ymm8,96+384(%rdi)

	leaq	512(%rsi),%rsi
	leaq	512(%rdi),%rdi
	subq	$512,%rbx
	jmp	.Lopen_avx2_main_loop
.Lopen_avx2_main_loop_done:
	testq	%rbx,%rbx
	vzeroupper
	je	.Lopen_sse_finalize

	cmpq	$384,%rbx
	ja	.Lopen_avx2_tail_512
	cmpq	$256,%rbx
	ja	.Lopen_avx2_tail_384
	cmpq	$128,%rbx
	ja	.Lopen_avx2_tail_256
	vmovdqa	.Lchacha20_consts(%rip),%ymm0
	vmovdqa	0+64(%rbp),%ymm4
	vmovdqa	0+96(%rbp),%ymm8
	vmovdqa	.Lavx2_inc(%rip),%ymm12
	vpaddd	0+160(%rbp),%ymm12,%ymm12
	vmovdqa	%ymm12,0+160(%rbp)

	xorq	%r8,%r8
	movq	%rbx,%rcx
	andq	$-16,%rcx
	testq	%rcx,%rcx
	je	.Lopen_avx2_tail_128_rounds
.Lopen_avx2_tail_128_rounds_and_x1hash:
	addq	0+0(%rsi,%r8,1),%r10
	adcq	8+0(%rsi,%r8,1),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

.Lopen_avx2_tail_128_rounds:
	addq	$16,%r8
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$12,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$4,%ymm4,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$4,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$12,%ymm4,%ymm4,%ymm4

	cmpq	%rcx,%r8
	jb	.Lopen_avx2_tail_128_rounds_and_x1hash
	cmpq	$160,%r8
	jne	.Lopen_avx2_tail_128_rounds
	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
	vpaddd	0+64(%rbp),%ymm4,%ymm4
	vpaddd	0+96(%rbp),%ymm8,%ymm8
	vpaddd	0+160(%rbp),%ymm12,%ymm12
	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
	vmovdqa	%ymm3,%ymm8

	jmp	.Lopen_avx2_tail_128_xor

.Lopen_avx2_tail_256:
	vmovdqa	.Lchacha20_consts(%rip),%ymm0
	vmovdqa	0+64(%rbp),%ymm4
	vmovdqa	0+96(%rbp),%ymm8
	vmovdqa	%ymm0,%ymm1
	vmovdqa	%ymm4,%ymm5
	vmovdqa	%ymm8,%ymm9
	vmovdqa	.Lavx2_inc(%rip),%ymm12
	vpaddd	0+160(%rbp),%ymm12,%ymm13
	vpaddd	%ymm13,%ymm12,%ymm12
	vmovdqa	%ymm12,0+160(%rbp)
	vmovdqa	%ymm13,0+192(%rbp)

	movq	%rbx,0+128(%rbp)
	movq	%rbx,%rcx
	subq	$128,%rcx
	shrq	$4,%rcx
	movq	$10,%r8
	cmpq	$10,%rcx
	cmovgq	%r8,%rcx
	movq	%rsi,%rbx
	xorq	%r8,%r8
.Lopen_avx2_tail_256_rounds_and_x1hash:
	addq	0+0(%rbx),%r10
	adcq	8+0(%rbx),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	addq	%rax,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%rbx),%rbx
.Lopen_avx2_tail_256_rounds:
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$12,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$4,%ymm4,%ymm4,%ymm4
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpsrld	$20,%ymm5,%ymm3
	vpslld	$12,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpslld	$7,%ymm5,%ymm3
	vpsrld	$25,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpalignr	$12,%ymm13,%ymm13,%ymm13
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$4,%ymm5,%ymm5,%ymm5

	incq	%r8
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$4,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$12,%ymm4,%ymm4,%ymm4
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpsrld	$20,%ymm5,%ymm3
	vpslld	$12,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpslld	$7,%ymm5,%ymm3
	vpsrld	$25,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpalignr	$4,%ymm13,%ymm13,%ymm13
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$12,%ymm5,%ymm5,%ymm5
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpsrld	$20,%ymm6,%ymm3
	vpslld	$12,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpslld	$7,%ymm6,%ymm3
	vpsrld	$25,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpalignr	$4,%ymm14,%ymm14,%ymm14
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$12,%ymm6,%ymm6,%ymm6

	cmpq	%rcx,%r8
	jb	.Lopen_avx2_tail_256_rounds_and_x1hash
	cmpq	$10,%r8
	jne	.Lopen_avx2_tail_256_rounds
	movq	%rbx,%r8
	subq	%rsi,%rbx
	movq	%rbx,%rcx
	movq	0+128(%rbp),%rbx
.Lopen_avx2_tail_256_hash:
	addq	$16,%rcx
	cmpq	%rbx,%rcx
	jg	.Lopen_avx2_tail_256_done
	addq	0+0(%r8),%r10
	adcq	8+0(%r8),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	addq	%rax,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%r8),%r8
	jmp	.Lopen_avx2_tail_256_hash
.Lopen_avx2_tail_256_done:
	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
	vpaddd	0+64(%rbp),%ymm5,%ymm5
	vpaddd	0+96(%rbp),%ymm9,%ymm9
	vpaddd	0+192(%rbp),%ymm13,%ymm13
	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
	vpaddd	0+64(%rbp),%ymm4,%ymm4
	vpaddd	0+96(%rbp),%ymm8,%ymm8
	vpaddd	0+160(%rbp),%ymm12,%ymm12
	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
	vpxor	0+0(%rsi),%ymm3,%ymm3
	vpxor	32+0(%rsi),%ymm1,%ymm1
	vpxor	64+0(%rsi),%ymm5,%ymm5
	vpxor	96+0(%rsi),%ymm9,%ymm9
	vmovdqu	%ymm3,0+0(%rdi)
	vmovdqu	%ymm1,32+0(%rdi)
	vmovdqu	%ymm5,64+0(%rdi)
	vmovdqu	%ymm9,96+0(%rdi)
	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
	vmovdqa	%ymm3,%ymm8

	leaq	128(%rsi),%rsi
	leaq	128(%rdi),%rdi
	subq	$128,%rbx
	jmp	.Lopen_avx2_tail_128_xor

.Lopen_avx2_tail_384:
	vmovdqa	.Lchacha20_consts(%rip),%ymm0
	vmovdqa	0+64(%rbp),%ymm4
	vmovdqa	0+96(%rbp),%ymm8
	vmovdqa	%ymm0,%ymm1
	vmovdqa	%ymm4,%ymm5
	vmovdqa	%ymm8,%ymm9
	vmovdqa	%ymm0,%ymm2
	vmovdqa	%ymm4,%ymm6
	vmovdqa	%ymm8,%ymm10
	vmovdqa	.Lavx2_inc(%rip),%ymm12
	vpaddd	0+160(%rbp),%ymm12,%ymm14
	vpaddd	%ymm14,%ymm12,%ymm13
	vpaddd	%ymm13,%ymm12,%ymm12
	vmovdqa	%ymm12,0+160(%rbp)
	vmovdqa	%ymm13,0+192(%rbp)
	vmovdqa	%ymm14,0+224(%rbp)

	movq	%rbx,0+128(%rbp)
	movq	%rbx,%rcx
	subq	$256,%rcx
	shrq	$4,%rcx
	addq	$6,%rcx
	movq	$10,%r8
	cmpq	$10,%rcx
	cmovgq	%r8,%rcx
	movq	%rsi,%rbx
	xorq	%r8,%r8
.Lopen_avx2_tail_384_rounds_and_x2hash:
	addq	0+0(%rbx),%r10
	adcq	8+0(%rbx),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	addq	%rax,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%rbx),%rbx
.Lopen_avx2_tail_384_rounds_and_x1hash:
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpsrld	$20,%ymm6,%ymm3
	vpslld	$12,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpslld	$7,%ymm6,%ymm3
	vpsrld	$25,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpalignr	$12,%ymm14,%ymm14,%ymm14
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$4,%ymm6,%ymm6,%ymm6
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpsrld	$20,%ymm5,%ymm3
	vpslld	$12,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpslld	$7,%ymm5,%ymm3
	vpsrld	$25,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpalignr	$12,%ymm13,%ymm13,%ymm13
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$4,%ymm5,%ymm5,%ymm5
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$12,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$4,%ymm4,%ymm4,%ymm4
	addq	0+0(%rbx),%r10
	adcq	8+0(%rbx),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%rbx),%rbx
	incq	%r8
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpsrld	$20,%ymm6,%ymm3
	vpslld	$12,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpslld	$7,%ymm6,%ymm3
	vpsrld	$25,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpalignr	$4,%ymm14,%ymm14,%ymm14
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$12,%ymm6,%ymm6,%ymm6
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpsrld	$20,%ymm5,%ymm3
	vpslld	$12,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpslld	$7,%ymm5,%ymm3
	vpsrld	$25,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpalignr	$4,%ymm13,%ymm13,%ymm13
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$12,%ymm5,%ymm5,%ymm5
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$4,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$12,%ymm4,%ymm4,%ymm4

	cmpq	%rcx,%r8
	jb	.Lopen_avx2_tail_384_rounds_and_x2hash
	cmpq	$10,%r8
	jne	.Lopen_avx2_tail_384_rounds_and_x1hash
	movq	%rbx,%r8
	subq	%rsi,%rbx
	movq	%rbx,%rcx
	movq	0+128(%rbp),%rbx
.Lopen_avx2_384_tail_hash:
	addq	$16,%rcx
	cmpq	%rbx,%rcx
	jg	.Lopen_avx2_384_tail_done
	addq	0+0(%r8),%r10
	adcq	8+0(%r8),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	addq	%rax,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%r8),%r8
	jmp	.Lopen_avx2_384_tail_hash
.Lopen_avx2_384_tail_done:
	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
	vpaddd	0+64(%rbp),%ymm6,%ymm6
	vpaddd	0+96(%rbp),%ymm10,%ymm10
	vpaddd	0+224(%rbp),%ymm14,%ymm14
	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
	vpaddd	0+64(%rbp),%ymm5,%ymm5
	vpaddd	0+96(%rbp),%ymm9,%ymm9
	vpaddd	0+192(%rbp),%ymm13,%ymm13
	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
	vpaddd	0+64(%rbp),%ymm4,%ymm4
	vpaddd	0+96(%rbp),%ymm8,%ymm8
	vpaddd	0+160(%rbp),%ymm12,%ymm12
	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
	vpxor	0+0(%rsi),%ymm3,%ymm3
	vpxor	32+0(%rsi),%ymm2,%ymm2
	vpxor	64+0(%rsi),%ymm6,%ymm6
	vpxor	96+0(%rsi),%ymm10,%ymm10
	vmovdqu	%ymm3,0+0(%rdi)
	vmovdqu	%ymm2,32+0(%rdi)
	vmovdqu	%ymm6,64+0(%rdi)
	vmovdqu	%ymm10,96+0(%rdi)
	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
	vpxor	0+128(%rsi),%ymm3,%ymm3
	vpxor	32+128(%rsi),%ymm1,%ymm1
	vpxor	64+128(%rsi),%ymm5,%ymm5
	vpxor	96+128(%rsi),%ymm9,%ymm9
	vmovdqu	%ymm3,0+128(%rdi)
	vmovdqu	%ymm1,32+128(%rdi)
	vmovdqu	%ymm5,64+128(%rdi)
	vmovdqu	%ymm9,96+128(%rdi)
	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
	vmovdqa	%ymm3,%ymm8

	leaq	256(%rsi),%rsi
	leaq	256(%rdi),%rdi
	subq	$256,%rbx
	jmp	.Lopen_avx2_tail_128_xor

.Lopen_avx2_tail_512:
	vmovdqa	.Lchacha20_consts(%rip),%ymm0
	vmovdqa	0+64(%rbp),%ymm4
	vmovdqa	0+96(%rbp),%ymm8
	vmovdqa	%ymm0,%ymm1
	vmovdqa	%ymm4,%ymm5
	vmovdqa	%ymm8,%ymm9
	vmovdqa	%ymm0,%ymm2
	vmovdqa	%ymm4,%ymm6
	vmovdqa	%ymm8,%ymm10
	vmovdqa	%ymm0,%ymm3
	vmovdqa	%ymm4,%ymm7
	vmovdqa	%ymm8,%ymm11
	vmovdqa	.Lavx2_inc(%rip),%ymm12
	vpaddd	0+160(%rbp),%ymm12,%ymm15
	vpaddd	%ymm15,%ymm12,%ymm14
	vpaddd	%ymm14,%ymm12,%ymm13
	vpaddd	%ymm13,%ymm12,%ymm12
	vmovdqa	%ymm15,0+256(%rbp)
	vmovdqa	%ymm14,0+224(%rbp)
	vmovdqa	%ymm13,0+192(%rbp)
	vmovdqa	%ymm12,0+160(%rbp)

	xorq	%rcx,%rcx
	movq	%rsi,%r8
.Lopen_avx2_tail_512_rounds_and_x2hash:
	addq	0+0(%r8),%r10
	adcq	8+0(%r8),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%r8),%r8
.Lopen_avx2_tail_512_rounds_and_x1hash:
	vmovdqa	%ymm8,0+128(%rbp)
	vmovdqa	.Lrol16(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$20,%ymm7,%ymm8
	vpslld	$32-20,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$20,%ymm6,%ymm8
	vpslld	$32-20,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$20,%ymm5,%ymm8
	vpslld	$32-20,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$20,%ymm4,%ymm8
	vpslld	$32-20,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	.Lrol8(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	addq	0+0(%r8),%r10
	adcq	8+0(%r8),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	addq	%rax,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$25,%ymm7,%ymm8
	vpslld	$32-25,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$25,%ymm6,%ymm8
	vpslld	$32-25,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$25,%ymm5,%ymm8
	vpslld	$32-25,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$25,%ymm4,%ymm8
	vpslld	$32-25,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	0+128(%rbp),%ymm8
	vpalignr	$4,%ymm7,%ymm7,%ymm7
	vpalignr	$8,%ymm11,%ymm11,%ymm11
	vpalignr	$12,%ymm15,%ymm15,%ymm15
	vpalignr	$4,%ymm6,%ymm6,%ymm6
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$12,%ymm14,%ymm14,%ymm14
	vpalignr	$4,%ymm5,%ymm5,%ymm5
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$12,%ymm13,%ymm13,%ymm13
	vpalignr	$4,%ymm4,%ymm4,%ymm4
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$12,%ymm12,%ymm12,%ymm12
	vmovdqa	%ymm8,0+128(%rbp)
	vmovdqa	.Lrol16(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	addq	0+16(%r8),%r10
	adcq	8+16(%r8),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	addq	%rax,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	32(%r8),%r8
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$20,%ymm7,%ymm8
	vpslld	$32-20,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$20,%ymm6,%ymm8
	vpslld	$32-20,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$20,%ymm5,%ymm8
	vpslld	$32-20,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$20,%ymm4,%ymm8
	vpslld	$32-20,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	.Lrol8(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$25,%ymm7,%ymm8
	vpslld	$32-25,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$25,%ymm6,%ymm8
	vpslld	$32-25,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$25,%ymm5,%ymm8
	vpslld	$32-25,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$25,%ymm4,%ymm8
	vpslld	$32-25,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	0+128(%rbp),%ymm8
	vpalignr	$12,%ymm7,%ymm7,%ymm7
	vpalignr	$8,%ymm11,%ymm11,%ymm11
	vpalignr	$4,%ymm15,%ymm15,%ymm15
	vpalignr	$12,%ymm6,%ymm6,%ymm6
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$4,%ymm14,%ymm14,%ymm14
	vpalignr	$12,%ymm5,%ymm5,%ymm5
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$4,%ymm13,%ymm13,%ymm13
	vpalignr	$12,%ymm4,%ymm4,%ymm4
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$4,%ymm12,%ymm12,%ymm12

	incq	%rcx
	cmpq	$4,%rcx
	jl	.Lopen_avx2_tail_512_rounds_and_x2hash
	cmpq	$10,%rcx
	jne	.Lopen_avx2_tail_512_rounds_and_x1hash
	movq	%rbx,%rcx
	subq	$384,%rcx
	andq	$-16,%rcx
.Lopen_avx2_tail_512_hash:
	testq	%rcx,%rcx
	je	.Lopen_avx2_tail_512_done
	addq	0+0(%r8),%r10
	adcq	8+0(%r8),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	addq	%rax,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%r8),%r8
	subq	$16,%rcx
	jmp	.Lopen_avx2_tail_512_hash
.Lopen_avx2_tail_512_done:
	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
	vpaddd	0+64(%rbp),%ymm7,%ymm7
	vpaddd	0+96(%rbp),%ymm11,%ymm11
	vpaddd	0+256(%rbp),%ymm15,%ymm15
	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
	vpaddd	0+64(%rbp),%ymm6,%ymm6
	vpaddd	0+96(%rbp),%ymm10,%ymm10
	vpaddd	0+224(%rbp),%ymm14,%ymm14
	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
	vpaddd	0+64(%rbp),%ymm5,%ymm5
	vpaddd	0+96(%rbp),%ymm9,%ymm9
	vpaddd	0+192(%rbp),%ymm13,%ymm13
	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
	vpaddd	0+64(%rbp),%ymm4,%ymm4
	vpaddd	0+96(%rbp),%ymm8,%ymm8
	vpaddd	0+160(%rbp),%ymm12,%ymm12

	vmovdqa	%ymm0,0+128(%rbp)
	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
	vpxor	0+0(%rsi),%ymm0,%ymm0
	vpxor	32+0(%rsi),%ymm3,%ymm3
	vpxor	64+0(%rsi),%ymm7,%ymm7
	vpxor	96+0(%rsi),%ymm11,%ymm11
	vmovdqu	%ymm0,0+0(%rdi)
	vmovdqu	%ymm3,32+0(%rdi)
	vmovdqu	%ymm7,64+0(%rdi)
	vmovdqu	%ymm11,96+0(%rdi)

	vmovdqa	0+128(%rbp),%ymm0
	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
	vpxor	0+128(%rsi),%ymm3,%ymm3
	vpxor	32+128(%rsi),%ymm2,%ymm2
	vpxor	64+128(%rsi),%ymm6,%ymm6
	vpxor	96+128(%rsi),%ymm10,%ymm10
	vmovdqu	%ymm3,0+128(%rdi)
	vmovdqu	%ymm2,32+128(%rdi)
	vmovdqu	%ymm6,64+128(%rdi)
	vmovdqu	%ymm10,96+128(%rdi)
	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
	vpxor	0+256(%rsi),%ymm3,%ymm3
	vpxor	32+256(%rsi),%ymm1,%ymm1
	vpxor	64+256(%rsi),%ymm5,%ymm5
	vpxor	96+256(%rsi),%ymm9,%ymm9
	vmovdqu	%ymm3,0+256(%rdi)
	vmovdqu	%ymm1,32+256(%rdi)
	vmovdqu	%ymm5,64+256(%rdi)
	vmovdqu	%ymm9,96+256(%rdi)
	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
	vmovdqa	%ymm3,%ymm8

	leaq	384(%rsi),%rsi
	leaq	384(%rdi),%rdi
	subq	$384,%rbx
.Lopen_avx2_tail_128_xor:
	cmpq	$32,%rbx
	jb	.Lopen_avx2_tail_32_xor
	subq	$32,%rbx
	vpxor	(%rsi),%ymm0,%ymm0
	vmovdqu	%ymm0,(%rdi)
	leaq	32(%rsi),%rsi
	leaq	32(%rdi),%rdi
	vmovdqa	%ymm4,%ymm0
	vmovdqa	%ymm8,%ymm4
	vmovdqa	%ymm12,%ymm8
	jmp	.Lopen_avx2_tail_128_xor
.Lopen_avx2_tail_32_xor:
	cmpq	$16,%rbx
	vmovdqa	%xmm0,%xmm1
	jb	.Lopen_avx2_exit
	subq	$16,%rbx

	vpxor	(%rsi),%xmm0,%xmm1
	vmovdqu	%xmm1,(%rdi)
	leaq	16(%rsi),%rsi
	leaq	16(%rdi),%rdi
	vperm2i128	$0x11,%ymm0,%ymm0,%ymm0
	vmovdqa	%xmm0,%xmm1
.Lopen_avx2_exit:
	vzeroupper
	jmp	.Lopen_sse_tail_16

.Lopen_avx2_192:
	vmovdqa	%ymm0,%ymm1
	vmovdqa	%ymm0,%ymm2
	vmovdqa	%ymm4,%ymm5
	vmovdqa	%ymm4,%ymm6
	vmovdqa	%ymm8,%ymm9
	vmovdqa	%ymm8,%ymm10
	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
	vmovdqa	%ymm12,%ymm11
	vmovdqa	%ymm13,%ymm15
	movq	$10,%r10
.Lopen_avx2_192_rounds:
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$12,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$4,%ymm4,%ymm4,%ymm4
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpsrld	$20,%ymm5,%ymm3
	vpslld	$12,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpslld	$7,%ymm5,%ymm3
	vpsrld	$25,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpalignr	$12,%ymm13,%ymm13,%ymm13
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$4,%ymm5,%ymm5,%ymm5
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$4,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$12,%ymm4,%ymm4,%ymm4
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpsrld	$20,%ymm5,%ymm3
	vpslld	$12,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpslld	$7,%ymm5,%ymm3
	vpsrld	$25,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpalignr	$4,%ymm13,%ymm13,%ymm13
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$12,%ymm5,%ymm5,%ymm5

	decq	%r10
	jne	.Lopen_avx2_192_rounds
	vpaddd	%ymm2,%ymm0,%ymm0
	vpaddd	%ymm2,%ymm1,%ymm1
	vpaddd	%ymm6,%ymm4,%ymm4
	vpaddd	%ymm6,%ymm5,%ymm5
	vpaddd	%ymm10,%ymm8,%ymm8
	vpaddd	%ymm10,%ymm9,%ymm9
	vpaddd	%ymm11,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm13,%ymm13
	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3

	vpand	.Lclamp(%rip),%ymm3,%ymm3
	vmovdqa	%ymm3,0+0(%rbp)

	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
.Lopen_avx2_short:
	movq	%r8,%r8
	call	poly_hash_ad_internal
.Lopen_avx2_short_hash_and_xor_loop:
	cmpq	$32,%rbx
	jb	.Lopen_avx2_short_tail_32
	subq	$32,%rbx
	addq	0+0(%rsi),%r10
	adcq	8+0(%rsi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	addq	0+16(%rsi),%r10
	adcq	8+16(%rsi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12


	vpxor	(%rsi),%ymm0,%ymm0
	vmovdqu	%ymm0,(%rdi)
	leaq	32(%rsi),%rsi
	leaq	32(%rdi),%rdi

	vmovdqa	%ymm4,%ymm0
	vmovdqa	%ymm8,%ymm4
	vmovdqa	%ymm12,%ymm8
	vmovdqa	%ymm1,%ymm12
	vmovdqa	%ymm5,%ymm1
	vmovdqa	%ymm9,%ymm5
	vmovdqa	%ymm13,%ymm9
	vmovdqa	%ymm2,%ymm13
	vmovdqa	%ymm6,%ymm2
	jmp	.Lopen_avx2_short_hash_and_xor_loop
.Lopen_avx2_short_tail_32:
	cmpq	$16,%rbx
	vmovdqa	%xmm0,%xmm1
	jb	.Lopen_avx2_short_tail_32_exit
	subq	$16,%rbx
	addq	0+0(%rsi),%r10
	adcq	8+0(%rsi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	vpxor	(%rsi),%xmm0,%xmm3
	vmovdqu	%xmm3,(%rdi)
	leaq	16(%rsi),%rsi
	leaq	16(%rdi),%rdi
	vextracti128	$1,%ymm0,%xmm1
.Lopen_avx2_short_tail_32_exit:
	vzeroupper
	jmp	.Lopen_sse_tail_16

.Lopen_avx2_320:
	vmovdqa	%ymm0,%ymm1
	vmovdqa	%ymm0,%ymm2
	vmovdqa	%ymm4,%ymm5
	vmovdqa	%ymm4,%ymm6
	vmovdqa	%ymm8,%ymm9
	vmovdqa	%ymm8,%ymm10
	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm14
	vmovdqa	%ymm4,%ymm7
	vmovdqa	%ymm8,%ymm11
	vmovdqa	%ymm12,0+160(%rbp)
	vmovdqa	%ymm13,0+192(%rbp)
	vmovdqa	%ymm14,0+224(%rbp)
	movq	$10,%r10
.Lopen_avx2_320_rounds:
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$12,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$4,%ymm4,%ymm4,%ymm4
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpsrld	$20,%ymm5,%ymm3
	vpslld	$12,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpslld	$7,%ymm5,%ymm3
	vpsrld	$25,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpalignr	$12,%ymm13,%ymm13,%ymm13
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$4,%ymm5,%ymm5,%ymm5
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpsrld	$20,%ymm6,%ymm3
	vpslld	$12,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpslld	$7,%ymm6,%ymm3
	vpsrld	$25,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpalignr	$12,%ymm14,%ymm14,%ymm14
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$4,%ymm6,%ymm6,%ymm6
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$4,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$12,%ymm4,%ymm4,%ymm4
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpsrld	$20,%ymm5,%ymm3
	vpslld	$12,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpslld	$7,%ymm5,%ymm3
	vpsrld	$25,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpalignr	$4,%ymm13,%ymm13,%ymm13
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$12,%ymm5,%ymm5,%ymm5
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpsrld	$20,%ymm6,%ymm3
	vpslld	$12,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpslld	$7,%ymm6,%ymm3
	vpsrld	$25,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpalignr	$4,%ymm14,%ymm14,%ymm14
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$12,%ymm6,%ymm6,%ymm6

	decq	%r10
	jne	.Lopen_avx2_320_rounds
	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
	vpaddd	%ymm7,%ymm4,%ymm4
	vpaddd	%ymm7,%ymm5,%ymm5
	vpaddd	%ymm7,%ymm6,%ymm6
	vpaddd	%ymm11,%ymm8,%ymm8
	vpaddd	%ymm11,%ymm9,%ymm9
	vpaddd	%ymm11,%ymm10,%ymm10
	vpaddd	0+160(%rbp),%ymm12,%ymm12
	vpaddd	0+192(%rbp),%ymm13,%ymm13
	vpaddd	0+224(%rbp),%ymm14,%ymm14
	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3

	vpand	.Lclamp(%rip),%ymm3,%ymm3
	vmovdqa	%ymm3,0+0(%rbp)

	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
	jmp	.Lopen_avx2_short
.size	chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2
.cfi_endproc	


.type	chacha20_poly1305_seal_avx2,@function
.align	64
chacha20_poly1305_seal_avx2:
.cfi_startproc	


.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-16
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-24
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
.cfi_adjust_cfa_offset	8
.cfi_offset	%r9,-64
.cfi_adjust_cfa_offset	288 + 32

	vzeroupper
	vmovdqa	.Lchacha20_consts(%rip),%ymm0
	vbroadcasti128	0(%r9),%ymm4
	vbroadcasti128	16(%r9),%ymm8
	vbroadcasti128	32(%r9),%ymm12
	vpaddd	.Lavx2_init(%rip),%ymm12,%ymm12
	cmpq	$192,%rbx
	jbe	.Lseal_avx2_192
	cmpq	$320,%rbx
	jbe	.Lseal_avx2_320
	vmovdqa	%ymm0,%ymm1
	vmovdqa	%ymm0,%ymm2
	vmovdqa	%ymm0,%ymm3
	vmovdqa	%ymm4,%ymm5
	vmovdqa	%ymm4,%ymm6
	vmovdqa	%ymm4,%ymm7
	vmovdqa	%ymm4,0+64(%rbp)
	vmovdqa	%ymm8,%ymm9
	vmovdqa	%ymm8,%ymm10
	vmovdqa	%ymm8,%ymm11
	vmovdqa	%ymm8,0+96(%rbp)
	vmovdqa	%ymm12,%ymm15
	vpaddd	.Lavx2_inc(%rip),%ymm15,%ymm14
	vpaddd	.Lavx2_inc(%rip),%ymm14,%ymm13
	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm12
	vmovdqa	%ymm12,0+160(%rbp)
	vmovdqa	%ymm13,0+192(%rbp)
	vmovdqa	%ymm14,0+224(%rbp)
	vmovdqa	%ymm15,0+256(%rbp)
	movq	$10,%r10
.Lseal_avx2_init_rounds:
	vmovdqa	%ymm8,0+128(%rbp)
	vmovdqa	.Lrol16(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$20,%ymm7,%ymm8
	vpslld	$32-20,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$20,%ymm6,%ymm8
	vpslld	$32-20,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$20,%ymm5,%ymm8
	vpslld	$32-20,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$20,%ymm4,%ymm8
	vpslld	$32-20,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	.Lrol8(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$25,%ymm7,%ymm8
	vpslld	$32-25,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$25,%ymm6,%ymm8
	vpslld	$32-25,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$25,%ymm5,%ymm8
	vpslld	$32-25,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$25,%ymm4,%ymm8
	vpslld	$32-25,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	0+128(%rbp),%ymm8
	vpalignr	$4,%ymm7,%ymm7,%ymm7
	vpalignr	$8,%ymm11,%ymm11,%ymm11
	vpalignr	$12,%ymm15,%ymm15,%ymm15
	vpalignr	$4,%ymm6,%ymm6,%ymm6
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$12,%ymm14,%ymm14,%ymm14
	vpalignr	$4,%ymm5,%ymm5,%ymm5
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$12,%ymm13,%ymm13,%ymm13
	vpalignr	$4,%ymm4,%ymm4,%ymm4
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$12,%ymm12,%ymm12,%ymm12
	vmovdqa	%ymm8,0+128(%rbp)
	vmovdqa	.Lrol16(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$20,%ymm7,%ymm8
	vpslld	$32-20,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$20,%ymm6,%ymm8
	vpslld	$32-20,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$20,%ymm5,%ymm8
	vpslld	$32-20,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$20,%ymm4,%ymm8
	vpslld	$32-20,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	.Lrol8(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$25,%ymm7,%ymm8
	vpslld	$32-25,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$25,%ymm6,%ymm8
	vpslld	$32-25,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$25,%ymm5,%ymm8
	vpslld	$32-25,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$25,%ymm4,%ymm8
	vpslld	$32-25,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	0+128(%rbp),%ymm8
	vpalignr	$12,%ymm7,%ymm7,%ymm7
	vpalignr	$8,%ymm11,%ymm11,%ymm11
	vpalignr	$4,%ymm15,%ymm15,%ymm15
	vpalignr	$12,%ymm6,%ymm6,%ymm6
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$4,%ymm14,%ymm14,%ymm14
	vpalignr	$12,%ymm5,%ymm5,%ymm5
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$4,%ymm13,%ymm13,%ymm13
	vpalignr	$12,%ymm4,%ymm4,%ymm4
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$4,%ymm12,%ymm12,%ymm12

	decq	%r10
	jnz	.Lseal_avx2_init_rounds
	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
	vpaddd	0+64(%rbp),%ymm7,%ymm7
	vpaddd	0+96(%rbp),%ymm11,%ymm11
	vpaddd	0+256(%rbp),%ymm15,%ymm15
	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
	vpaddd	0+64(%rbp),%ymm6,%ymm6
	vpaddd	0+96(%rbp),%ymm10,%ymm10
	vpaddd	0+224(%rbp),%ymm14,%ymm14
	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
	vpaddd	0+64(%rbp),%ymm5,%ymm5
	vpaddd	0+96(%rbp),%ymm9,%ymm9
	vpaddd	0+192(%rbp),%ymm13,%ymm13
	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
	vpaddd	0+64(%rbp),%ymm4,%ymm4
	vpaddd	0+96(%rbp),%ymm8,%ymm8
	vpaddd	0+160(%rbp),%ymm12,%ymm12

	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
	vperm2i128	$0x02,%ymm3,%ymm7,%ymm15
	vperm2i128	$0x13,%ymm3,%ymm7,%ymm3
	vpand	.Lclamp(%rip),%ymm15,%ymm15
	vmovdqa	%ymm15,0+0(%rbp)
	movq	%r8,%r8
	call	poly_hash_ad_internal

	vpxor	0(%rsi),%ymm3,%ymm3
	vpxor	32(%rsi),%ymm11,%ymm11
	vmovdqu	%ymm3,0(%rdi)
	vmovdqu	%ymm11,32(%rdi)
	vperm2i128	$0x02,%ymm2,%ymm6,%ymm15
	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
	vpxor	0+64(%rsi),%ymm15,%ymm15
	vpxor	32+64(%rsi),%ymm2,%ymm2
	vpxor	64+64(%rsi),%ymm6,%ymm6
	vpxor	96+64(%rsi),%ymm10,%ymm10
	vmovdqu	%ymm15,0+64(%rdi)
	vmovdqu	%ymm2,32+64(%rdi)
	vmovdqu	%ymm6,64+64(%rdi)
	vmovdqu	%ymm10,96+64(%rdi)
	vperm2i128	$0x02,%ymm1,%ymm5,%ymm15
	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
	vpxor	0+192(%rsi),%ymm15,%ymm15
	vpxor	32+192(%rsi),%ymm1,%ymm1
	vpxor	64+192(%rsi),%ymm5,%ymm5
	vpxor	96+192(%rsi),%ymm9,%ymm9
	vmovdqu	%ymm15,0+192(%rdi)
	vmovdqu	%ymm1,32+192(%rdi)
	vmovdqu	%ymm5,64+192(%rdi)
	vmovdqu	%ymm9,96+192(%rdi)
	vperm2i128	$0x13,%ymm0,%ymm4,%ymm15
	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
	vmovdqa	%ymm15,%ymm8

	leaq	320(%rsi),%rsi
	subq	$320,%rbx
	movq	$320,%rcx
	cmpq	$128,%rbx
	jbe	.Lseal_avx2_short_hash_remainder
	vpxor	0(%rsi),%ymm0,%ymm0
	vpxor	32(%rsi),%ymm4,%ymm4
	vpxor	64(%rsi),%ymm8,%ymm8
	vpxor	96(%rsi),%ymm12,%ymm12
	vmovdqu	%ymm0,320(%rdi)
	vmovdqu	%ymm4,352(%rdi)
	vmovdqu	%ymm8,384(%rdi)
	vmovdqu	%ymm12,416(%rdi)
	leaq	128(%rsi),%rsi
	subq	$128,%rbx
	movq	$8,%rcx
	movq	$2,%r8
	cmpq	$128,%rbx
	jbe	.Lseal_avx2_tail_128
	cmpq	$256,%rbx
	jbe	.Lseal_avx2_tail_256
	cmpq	$384,%rbx
	jbe	.Lseal_avx2_tail_384
	cmpq	$512,%rbx
	jbe	.Lseal_avx2_tail_512
	vmovdqa	.Lchacha20_consts(%rip),%ymm0
	vmovdqa	0+64(%rbp),%ymm4
	vmovdqa	0+96(%rbp),%ymm8
	vmovdqa	%ymm0,%ymm1
	vmovdqa	%ymm4,%ymm5
	vmovdqa	%ymm8,%ymm9
	vmovdqa	%ymm0,%ymm2
	vmovdqa	%ymm4,%ymm6
	vmovdqa	%ymm8,%ymm10
	vmovdqa	%ymm0,%ymm3
	vmovdqa	%ymm4,%ymm7
	vmovdqa	%ymm8,%ymm11
	vmovdqa	.Lavx2_inc(%rip),%ymm12
	vpaddd	0+160(%rbp),%ymm12,%ymm15
	vpaddd	%ymm15,%ymm12,%ymm14
	vpaddd	%ymm14,%ymm12,%ymm13
	vpaddd	%ymm13,%ymm12,%ymm12
	vmovdqa	%ymm15,0+256(%rbp)
	vmovdqa	%ymm14,0+224(%rbp)
	vmovdqa	%ymm13,0+192(%rbp)
	vmovdqa	%ymm12,0+160(%rbp)
	vmovdqa	%ymm8,0+128(%rbp)
	vmovdqa	.Lrol16(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$20,%ymm7,%ymm8
	vpslld	$32-20,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$20,%ymm6,%ymm8
	vpslld	$32-20,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$20,%ymm5,%ymm8
	vpslld	$32-20,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$20,%ymm4,%ymm8
	vpslld	$32-20,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	.Lrol8(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$25,%ymm7,%ymm8
	vpslld	$32-25,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$25,%ymm6,%ymm8
	vpslld	$32-25,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$25,%ymm5,%ymm8
	vpslld	$32-25,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$25,%ymm4,%ymm8
	vpslld	$32-25,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	0+128(%rbp),%ymm8
	vpalignr	$4,%ymm7,%ymm7,%ymm7
	vpalignr	$8,%ymm11,%ymm11,%ymm11
	vpalignr	$12,%ymm15,%ymm15,%ymm15
	vpalignr	$4,%ymm6,%ymm6,%ymm6
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$12,%ymm14,%ymm14,%ymm14
	vpalignr	$4,%ymm5,%ymm5,%ymm5
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$12,%ymm13,%ymm13,%ymm13
	vpalignr	$4,%ymm4,%ymm4,%ymm4
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$12,%ymm12,%ymm12,%ymm12
	vmovdqa	%ymm8,0+128(%rbp)
	vmovdqa	.Lrol16(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$20,%ymm7,%ymm8
	vpslld	$32-20,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$20,%ymm6,%ymm8
	vpslld	$32-20,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$20,%ymm5,%ymm8
	vpslld	$32-20,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$20,%ymm4,%ymm8
	vpslld	$32-20,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	.Lrol8(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$25,%ymm7,%ymm8
	vpslld	$32-25,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$25,%ymm6,%ymm8
	vpslld	$32-25,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$25,%ymm5,%ymm8
	vpslld	$32-25,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$25,%ymm4,%ymm8
	vpslld	$32-25,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	0+128(%rbp),%ymm8
	vpalignr	$12,%ymm7,%ymm7,%ymm7
	vpalignr	$8,%ymm11,%ymm11,%ymm11
	vpalignr	$4,%ymm15,%ymm15,%ymm15
	vpalignr	$12,%ymm6,%ymm6,%ymm6
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$4,%ymm14,%ymm14,%ymm14
	vpalignr	$12,%ymm5,%ymm5,%ymm5
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$4,%ymm13,%ymm13,%ymm13
	vpalignr	$12,%ymm4,%ymm4,%ymm4
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$4,%ymm12,%ymm12,%ymm12
	vmovdqa	%ymm8,0+128(%rbp)
	vmovdqa	.Lrol16(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$20,%ymm7,%ymm8
	vpslld	$32-20,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$20,%ymm6,%ymm8
	vpslld	$32-20,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$20,%ymm5,%ymm8
	vpslld	$32-20,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$20,%ymm4,%ymm8
	vpslld	$32-20,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	.Lrol8(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15

	subq	$16,%rdi
	movq	$9,%rcx
	jmp	.Lseal_avx2_main_loop_rounds_entry
.align	32
.Lseal_avx2_main_loop:
	vmovdqa	.Lchacha20_consts(%rip),%ymm0
	vmovdqa	0+64(%rbp),%ymm4
	vmovdqa	0+96(%rbp),%ymm8
	vmovdqa	%ymm0,%ymm1
	vmovdqa	%ymm4,%ymm5
	vmovdqa	%ymm8,%ymm9
	vmovdqa	%ymm0,%ymm2
	vmovdqa	%ymm4,%ymm6
	vmovdqa	%ymm8,%ymm10
	vmovdqa	%ymm0,%ymm3
	vmovdqa	%ymm4,%ymm7
	vmovdqa	%ymm8,%ymm11
	vmovdqa	.Lavx2_inc(%rip),%ymm12
	vpaddd	0+160(%rbp),%ymm12,%ymm15
	vpaddd	%ymm15,%ymm12,%ymm14
	vpaddd	%ymm14,%ymm12,%ymm13
	vpaddd	%ymm13,%ymm12,%ymm12
	vmovdqa	%ymm15,0+256(%rbp)
	vmovdqa	%ymm14,0+224(%rbp)
	vmovdqa	%ymm13,0+192(%rbp)
	vmovdqa	%ymm12,0+160(%rbp)

	movq	$10,%rcx
.align	32
.Lseal_avx2_main_loop_rounds:
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	vmovdqa	%ymm8,0+128(%rbp)
	vmovdqa	.Lrol16(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$20,%ymm7,%ymm8
	vpslld	$32-20,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$20,%ymm6,%ymm8
	vpslld	$32-20,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$20,%ymm5,%ymm8
	vpslld	$32-20,%ymm5,%ymm5
	addq	%rax,%r15
	adcq	%rdx,%r9
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$20,%ymm4,%ymm8
	vpslld	$32-20,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	.Lrol8(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

.Lseal_avx2_main_loop_rounds_entry:
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	addq	0+16(%rdi),%r10
	adcq	8+16(%rdi),%r11
	adcq	$1,%r12
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$25,%ymm7,%ymm8
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	vpslld	$32-25,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$25,%ymm6,%ymm8
	vpslld	$32-25,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$25,%ymm5,%ymm8
	vpslld	$32-25,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$25,%ymm4,%ymm8
	vpslld	$32-25,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	0+128(%rbp),%ymm8
	vpalignr	$4,%ymm7,%ymm7,%ymm7
	vpalignr	$8,%ymm11,%ymm11,%ymm11
	vpalignr	$12,%ymm15,%ymm15,%ymm15
	vpalignr	$4,%ymm6,%ymm6,%ymm6
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$12,%ymm14,%ymm14,%ymm14
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	vpalignr	$4,%ymm5,%ymm5,%ymm5
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$12,%ymm13,%ymm13,%ymm13
	vpalignr	$4,%ymm4,%ymm4,%ymm4
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$12,%ymm12,%ymm12,%ymm12
	vmovdqa	%ymm8,0+128(%rbp)
	vmovdqa	.Lrol16(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	addq	%rax,%r15
	adcq	%rdx,%r9
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$20,%ymm7,%ymm8
	vpslld	$32-20,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$20,%ymm6,%ymm8
	vpslld	$32-20,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	addq	0+32(%rdi),%r10
	adcq	8+32(%rdi),%r11
	adcq	$1,%r12

	leaq	48(%rdi),%rdi
	vpsrld	$20,%ymm5,%ymm8
	vpslld	$32-20,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$20,%ymm4,%ymm8
	vpslld	$32-20,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	.Lrol8(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$25,%ymm7,%ymm8
	vpslld	$32-25,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$25,%ymm6,%ymm8
	vpslld	$32-25,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	addq	%rax,%r15
	adcq	%rdx,%r9
	vpsrld	$25,%ymm5,%ymm8
	vpslld	$32-25,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$25,%ymm4,%ymm8
	vpslld	$32-25,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	0+128(%rbp),%ymm8
	vpalignr	$12,%ymm7,%ymm7,%ymm7
	vpalignr	$8,%ymm11,%ymm11,%ymm11
	vpalignr	$4,%ymm15,%ymm15,%ymm15
	vpalignr	$12,%ymm6,%ymm6,%ymm6
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$4,%ymm14,%ymm14,%ymm14
	vpalignr	$12,%ymm5,%ymm5,%ymm5
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$4,%ymm13,%ymm13,%ymm13
	vpalignr	$12,%ymm4,%ymm4,%ymm4
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	vpalignr	$4,%ymm12,%ymm12,%ymm12

	decq	%rcx
	jne	.Lseal_avx2_main_loop_rounds
	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
	vpaddd	0+64(%rbp),%ymm7,%ymm7
	vpaddd	0+96(%rbp),%ymm11,%ymm11
	vpaddd	0+256(%rbp),%ymm15,%ymm15
	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
	vpaddd	0+64(%rbp),%ymm6,%ymm6
	vpaddd	0+96(%rbp),%ymm10,%ymm10
	vpaddd	0+224(%rbp),%ymm14,%ymm14
	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
	vpaddd	0+64(%rbp),%ymm5,%ymm5
	vpaddd	0+96(%rbp),%ymm9,%ymm9
	vpaddd	0+192(%rbp),%ymm13,%ymm13
	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
	vpaddd	0+64(%rbp),%ymm4,%ymm4
	vpaddd	0+96(%rbp),%ymm8,%ymm8
	vpaddd	0+160(%rbp),%ymm12,%ymm12

	vmovdqa	%ymm0,0+128(%rbp)
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	addq	%rax,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	addq	0+16(%rdi),%r10
	adcq	8+16(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	addq	%rax,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	32(%rdi),%rdi
	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
	vpxor	0+0(%rsi),%ymm0,%ymm0
	vpxor	32+0(%rsi),%ymm3,%ymm3
	vpxor	64+0(%rsi),%ymm7,%ymm7
	vpxor	96+0(%rsi),%ymm11,%ymm11
	vmovdqu	%ymm0,0+0(%rdi)
	vmovdqu	%ymm3,32+0(%rdi)
	vmovdqu	%ymm7,64+0(%rdi)
	vmovdqu	%ymm11,96+0(%rdi)

	vmovdqa	0+128(%rbp),%ymm0
	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
	vpxor	0+128(%rsi),%ymm3,%ymm3
	vpxor	32+128(%rsi),%ymm2,%ymm2
	vpxor	64+128(%rsi),%ymm6,%ymm6
	vpxor	96+128(%rsi),%ymm10,%ymm10
	vmovdqu	%ymm3,0+128(%rdi)
	vmovdqu	%ymm2,32+128(%rdi)
	vmovdqu	%ymm6,64+128(%rdi)
	vmovdqu	%ymm10,96+128(%rdi)
	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
	vpxor	0+256(%rsi),%ymm3,%ymm3
	vpxor	32+256(%rsi),%ymm1,%ymm1
	vpxor	64+256(%rsi),%ymm5,%ymm5
	vpxor	96+256(%rsi),%ymm9,%ymm9
	vmovdqu	%ymm3,0+256(%rdi)
	vmovdqu	%ymm1,32+256(%rdi)
	vmovdqu	%ymm5,64+256(%rdi)
	vmovdqu	%ymm9,96+256(%rdi)
	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
	vpxor	0+384(%rsi),%ymm3,%ymm3
	vpxor	32+384(%rsi),%ymm0,%ymm0
	vpxor	64+384(%rsi),%ymm4,%ymm4
	vpxor	96+384(%rsi),%ymm8,%ymm8
	vmovdqu	%ymm3,0+384(%rdi)
	vmovdqu	%ymm0,32+384(%rdi)
	vmovdqu	%ymm4,64+384(%rdi)
	vmovdqu	%ymm8,96+384(%rdi)

	leaq	512(%rsi),%rsi
	subq	$512,%rbx
	cmpq	$512,%rbx
	jg	.Lseal_avx2_main_loop

	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	addq	%rax,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	addq	0+16(%rdi),%r10
	adcq	8+16(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	addq	%rax,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	32(%rdi),%rdi
	movq	$10,%rcx
	xorq	%r8,%r8

	cmpq	$384,%rbx
	ja	.Lseal_avx2_tail_512
	cmpq	$256,%rbx
	ja	.Lseal_avx2_tail_384
	cmpq	$128,%rbx
	ja	.Lseal_avx2_tail_256

.Lseal_avx2_tail_128:
	vmovdqa	.Lchacha20_consts(%rip),%ymm0
	vmovdqa	0+64(%rbp),%ymm4
	vmovdqa	0+96(%rbp),%ymm8
	vmovdqa	.Lavx2_inc(%rip),%ymm12
	vpaddd	0+160(%rbp),%ymm12,%ymm12
	vmovdqa	%ymm12,0+160(%rbp)

.Lseal_avx2_tail_128_rounds_and_3xhash:
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	addq	%rax,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%rdi),%rdi
.Lseal_avx2_tail_128_rounds_and_2xhash:
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$12,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$4,%ymm4,%ymm4,%ymm4
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	addq	%rax,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$4,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$12,%ymm4,%ymm4,%ymm4
	addq	0+16(%rdi),%r10
	adcq	8+16(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	addq	%rax,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	32(%rdi),%rdi
	decq	%rcx
	jg	.Lseal_avx2_tail_128_rounds_and_3xhash
	decq	%r8
	jge	.Lseal_avx2_tail_128_rounds_and_2xhash
	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
	vpaddd	0+64(%rbp),%ymm4,%ymm4
	vpaddd	0+96(%rbp),%ymm8,%ymm8
	vpaddd	0+160(%rbp),%ymm12,%ymm12
	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
	vmovdqa	%ymm3,%ymm8

	jmp	.Lseal_avx2_short_loop

.Lseal_avx2_tail_256:
	vmovdqa	.Lchacha20_consts(%rip),%ymm0
	vmovdqa	0+64(%rbp),%ymm4
	vmovdqa	0+96(%rbp),%ymm8
	vmovdqa	%ymm0,%ymm1
	vmovdqa	%ymm4,%ymm5
	vmovdqa	%ymm8,%ymm9
	vmovdqa	.Lavx2_inc(%rip),%ymm12
	vpaddd	0+160(%rbp),%ymm12,%ymm13
	vpaddd	%ymm13,%ymm12,%ymm12
	vmovdqa	%ymm12,0+160(%rbp)
	vmovdqa	%ymm13,0+192(%rbp)

.Lseal_avx2_tail_256_rounds_and_3xhash:
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%rdi),%rdi
.Lseal_avx2_tail_256_rounds_and_2xhash:
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$12,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$4,%ymm4,%ymm4,%ymm4
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpsrld	$20,%ymm5,%ymm3
	vpslld	$12,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpslld	$7,%ymm5,%ymm3
	vpsrld	$25,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpalignr	$12,%ymm13,%ymm13,%ymm13
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$4,%ymm5,%ymm5,%ymm5
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$4,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$12,%ymm4,%ymm4,%ymm4
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpsrld	$20,%ymm5,%ymm3
	vpslld	$12,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpslld	$7,%ymm5,%ymm3
	vpsrld	$25,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpalignr	$4,%ymm13,%ymm13,%ymm13
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$12,%ymm5,%ymm5,%ymm5
	addq	0+16(%rdi),%r10
	adcq	8+16(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	32(%rdi),%rdi
	decq	%rcx
	jg	.Lseal_avx2_tail_256_rounds_and_3xhash
	decq	%r8
	jge	.Lseal_avx2_tail_256_rounds_and_2xhash
	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
	vpaddd	0+64(%rbp),%ymm5,%ymm5
	vpaddd	0+96(%rbp),%ymm9,%ymm9
	vpaddd	0+192(%rbp),%ymm13,%ymm13
	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
	vpaddd	0+64(%rbp),%ymm4,%ymm4
	vpaddd	0+96(%rbp),%ymm8,%ymm8
	vpaddd	0+160(%rbp),%ymm12,%ymm12
	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
	vpxor	0+0(%rsi),%ymm3,%ymm3
	vpxor	32+0(%rsi),%ymm1,%ymm1
	vpxor	64+0(%rsi),%ymm5,%ymm5
	vpxor	96+0(%rsi),%ymm9,%ymm9
	vmovdqu	%ymm3,0+0(%rdi)
	vmovdqu	%ymm1,32+0(%rdi)
	vmovdqu	%ymm5,64+0(%rdi)
	vmovdqu	%ymm9,96+0(%rdi)
	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
	vmovdqa	%ymm3,%ymm8

	movq	$128,%rcx
	leaq	128(%rsi),%rsi
	subq	$128,%rbx
	jmp	.Lseal_avx2_short_hash_remainder

.Lseal_avx2_tail_384:
	vmovdqa	.Lchacha20_consts(%rip),%ymm0
	vmovdqa	0+64(%rbp),%ymm4
	vmovdqa	0+96(%rbp),%ymm8
	vmovdqa	%ymm0,%ymm1
	vmovdqa	%ymm4,%ymm5
	vmovdqa	%ymm8,%ymm9
	vmovdqa	%ymm0,%ymm2
	vmovdqa	%ymm4,%ymm6
	vmovdqa	%ymm8,%ymm10
	vmovdqa	.Lavx2_inc(%rip),%ymm12
	vpaddd	0+160(%rbp),%ymm12,%ymm14
	vpaddd	%ymm14,%ymm12,%ymm13
	vpaddd	%ymm13,%ymm12,%ymm12
	vmovdqa	%ymm12,0+160(%rbp)
	vmovdqa	%ymm13,0+192(%rbp)
	vmovdqa	%ymm14,0+224(%rbp)

.Lseal_avx2_tail_384_rounds_and_3xhash:
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%rdi),%rdi
.Lseal_avx2_tail_384_rounds_and_2xhash:
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$12,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$4,%ymm4,%ymm4,%ymm4
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpsrld	$20,%ymm5,%ymm3
	vpslld	$12,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpslld	$7,%ymm5,%ymm3
	vpsrld	$25,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpalignr	$12,%ymm13,%ymm13,%ymm13
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$4,%ymm5,%ymm5,%ymm5
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpsrld	$20,%ymm6,%ymm3
	vpslld	$12,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpslld	$7,%ymm6,%ymm3
	vpsrld	$25,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpalignr	$12,%ymm14,%ymm14,%ymm14
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$4,%ymm6,%ymm6,%ymm6
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$4,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$12,%ymm4,%ymm4,%ymm4
	addq	0+16(%rdi),%r10
	adcq	8+16(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpsrld	$20,%ymm5,%ymm3
	vpslld	$12,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpslld	$7,%ymm5,%ymm3
	vpsrld	$25,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpalignr	$4,%ymm13,%ymm13,%ymm13
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$12,%ymm5,%ymm5,%ymm5
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpsrld	$20,%ymm6,%ymm3
	vpslld	$12,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpslld	$7,%ymm6,%ymm3
	vpsrld	$25,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpalignr	$4,%ymm14,%ymm14,%ymm14
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$12,%ymm6,%ymm6,%ymm6

	leaq	32(%rdi),%rdi
	decq	%rcx
	jg	.Lseal_avx2_tail_384_rounds_and_3xhash
	decq	%r8
	jge	.Lseal_avx2_tail_384_rounds_and_2xhash
	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
	vpaddd	0+64(%rbp),%ymm6,%ymm6
	vpaddd	0+96(%rbp),%ymm10,%ymm10
	vpaddd	0+224(%rbp),%ymm14,%ymm14
	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
	vpaddd	0+64(%rbp),%ymm5,%ymm5
	vpaddd	0+96(%rbp),%ymm9,%ymm9
	vpaddd	0+192(%rbp),%ymm13,%ymm13
	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
	vpaddd	0+64(%rbp),%ymm4,%ymm4
	vpaddd	0+96(%rbp),%ymm8,%ymm8
	vpaddd	0+160(%rbp),%ymm12,%ymm12
	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
	vpxor	0+0(%rsi),%ymm3,%ymm3
	vpxor	32+0(%rsi),%ymm2,%ymm2
	vpxor	64+0(%rsi),%ymm6,%ymm6
	vpxor	96+0(%rsi),%ymm10,%ymm10
	vmovdqu	%ymm3,0+0(%rdi)
	vmovdqu	%ymm2,32+0(%rdi)
	vmovdqu	%ymm6,64+0(%rdi)
	vmovdqu	%ymm10,96+0(%rdi)
	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
	vpxor	0+128(%rsi),%ymm3,%ymm3
	vpxor	32+128(%rsi),%ymm1,%ymm1
	vpxor	64+128(%rsi),%ymm5,%ymm5
	vpxor	96+128(%rsi),%ymm9,%ymm9
	vmovdqu	%ymm3,0+128(%rdi)
	vmovdqu	%ymm1,32+128(%rdi)
	vmovdqu	%ymm5,64+128(%rdi)
	vmovdqu	%ymm9,96+128(%rdi)
	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
	vmovdqa	%ymm3,%ymm8

	movq	$256,%rcx
	leaq	256(%rsi),%rsi
	subq	$256,%rbx
	jmp	.Lseal_avx2_short_hash_remainder

.Lseal_avx2_tail_512:
	vmovdqa	.Lchacha20_consts(%rip),%ymm0
	vmovdqa	0+64(%rbp),%ymm4
	vmovdqa	0+96(%rbp),%ymm8
	vmovdqa	%ymm0,%ymm1
	vmovdqa	%ymm4,%ymm5
	vmovdqa	%ymm8,%ymm9
	vmovdqa	%ymm0,%ymm2
	vmovdqa	%ymm4,%ymm6
	vmovdqa	%ymm8,%ymm10
	vmovdqa	%ymm0,%ymm3
	vmovdqa	%ymm4,%ymm7
	vmovdqa	%ymm8,%ymm11
	vmovdqa	.Lavx2_inc(%rip),%ymm12
	vpaddd	0+160(%rbp),%ymm12,%ymm15
	vpaddd	%ymm15,%ymm12,%ymm14
	vpaddd	%ymm14,%ymm12,%ymm13
	vpaddd	%ymm13,%ymm12,%ymm12
	vmovdqa	%ymm15,0+256(%rbp)
	vmovdqa	%ymm14,0+224(%rbp)
	vmovdqa	%ymm13,0+192(%rbp)
	vmovdqa	%ymm12,0+160(%rbp)

.Lseal_avx2_tail_512_rounds_and_3xhash:
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	addq	%rax,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%rdi),%rdi
.Lseal_avx2_tail_512_rounds_and_2xhash:
	vmovdqa	%ymm8,0+128(%rbp)
	vmovdqa	.Lrol16(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$20,%ymm7,%ymm8
	vpslld	$32-20,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$20,%ymm6,%ymm8
	vpslld	$32-20,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$20,%ymm5,%ymm8
	vpslld	$32-20,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$20,%ymm4,%ymm8
	vpslld	$32-20,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	.Lrol8(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$25,%ymm7,%ymm8
	vpslld	$32-25,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	vpsrld	$25,%ymm6,%ymm8
	vpslld	$32-25,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$25,%ymm5,%ymm8
	vpslld	$32-25,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$25,%ymm4,%ymm8
	vpslld	$32-25,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	0+128(%rbp),%ymm8
	vpalignr	$4,%ymm7,%ymm7,%ymm7
	vpalignr	$8,%ymm11,%ymm11,%ymm11
	vpalignr	$12,%ymm15,%ymm15,%ymm15
	vpalignr	$4,%ymm6,%ymm6,%ymm6
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$12,%ymm14,%ymm14,%ymm14
	vpalignr	$4,%ymm5,%ymm5,%ymm5
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$12,%ymm13,%ymm13,%ymm13
	vpalignr	$4,%ymm4,%ymm4,%ymm4
	addq	%rax,%r15
	adcq	%rdx,%r9
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$12,%ymm12,%ymm12,%ymm12
	vmovdqa	%ymm8,0+128(%rbp)
	vmovdqa	.Lrol16(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$20,%ymm7,%ymm8
	vpslld	$32-20,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$20,%ymm6,%ymm8
	vpslld	$32-20,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$20,%ymm5,%ymm8
	vpslld	$32-20,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$20,%ymm4,%ymm8
	vpslld	$32-20,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	.Lrol8(%rip),%ymm8
	vpaddd	%ymm7,%ymm3,%ymm3
	vpaddd	%ymm6,%ymm2,%ymm2
	addq	0+16(%rdi),%r10
	adcq	8+16(%rdi),%r11
	adcq	$1,%r12
	vpaddd	%ymm5,%ymm1,%ymm1
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm3,%ymm15,%ymm15
	vpxor	%ymm2,%ymm14,%ymm14
	vpxor	%ymm1,%ymm13,%ymm13
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	%ymm8,%ymm15,%ymm15
	vpshufb	%ymm8,%ymm14,%ymm14
	vpshufb	%ymm8,%ymm13,%ymm13
	vpshufb	%ymm8,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm11,%ymm11
	vpaddd	%ymm14,%ymm10,%ymm10
	vpaddd	%ymm13,%ymm9,%ymm9
	vpaddd	0+128(%rbp),%ymm12,%ymm8
	vpxor	%ymm11,%ymm7,%ymm7
	vpxor	%ymm10,%ymm6,%ymm6
	vpxor	%ymm9,%ymm5,%ymm5
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	%ymm8,0+128(%rbp)
	vpsrld	$25,%ymm7,%ymm8
	movq	0+0+0(%rbp),%rdx
	movq	%rdx,%r15
	mulxq	%r10,%r13,%r14
	mulxq	%r11,%rax,%rdx
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	vpslld	$32-25,%ymm7,%ymm7
	vpxor	%ymm8,%ymm7,%ymm7
	vpsrld	$25,%ymm6,%ymm8
	vpslld	$32-25,%ymm6,%ymm6
	vpxor	%ymm8,%ymm6,%ymm6
	vpsrld	$25,%ymm5,%ymm8
	vpslld	$32-25,%ymm5,%ymm5
	vpxor	%ymm8,%ymm5,%ymm5
	vpsrld	$25,%ymm4,%ymm8
	vpslld	$32-25,%ymm4,%ymm4
	vpxor	%ymm8,%ymm4,%ymm4
	vmovdqa	0+128(%rbp),%ymm8
	vpalignr	$12,%ymm7,%ymm7,%ymm7
	vpalignr	$8,%ymm11,%ymm11,%ymm11
	vpalignr	$4,%ymm15,%ymm15,%ymm15
	vpalignr	$12,%ymm6,%ymm6,%ymm6
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$4,%ymm14,%ymm14,%ymm14
	vpalignr	$12,%ymm5,%ymm5,%ymm5
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	movq	8+0+0(%rbp),%rdx
	mulxq	%r10,%r10,%rax
	addq	%r10,%r14
	mulxq	%r11,%r11,%r9
	adcq	%r11,%r15
	adcq	$0,%r9
	imulq	%r12,%rdx
	vpalignr	$4,%ymm13,%ymm13,%ymm13
	vpalignr	$12,%ymm4,%ymm4,%ymm4
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$4,%ymm12,%ymm12,%ymm12
















	addq	%rax,%r15
	adcq	%rdx,%r9




















	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	32(%rdi),%rdi
	decq	%rcx
	jg	.Lseal_avx2_tail_512_rounds_and_3xhash
	decq	%r8
	jge	.Lseal_avx2_tail_512_rounds_and_2xhash
	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
	vpaddd	0+64(%rbp),%ymm7,%ymm7
	vpaddd	0+96(%rbp),%ymm11,%ymm11
	vpaddd	0+256(%rbp),%ymm15,%ymm15
	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
	vpaddd	0+64(%rbp),%ymm6,%ymm6
	vpaddd	0+96(%rbp),%ymm10,%ymm10
	vpaddd	0+224(%rbp),%ymm14,%ymm14
	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
	vpaddd	0+64(%rbp),%ymm5,%ymm5
	vpaddd	0+96(%rbp),%ymm9,%ymm9
	vpaddd	0+192(%rbp),%ymm13,%ymm13
	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
	vpaddd	0+64(%rbp),%ymm4,%ymm4
	vpaddd	0+96(%rbp),%ymm8,%ymm8
	vpaddd	0+160(%rbp),%ymm12,%ymm12

	vmovdqa	%ymm0,0+128(%rbp)
	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
	vpxor	0+0(%rsi),%ymm0,%ymm0
	vpxor	32+0(%rsi),%ymm3,%ymm3
	vpxor	64+0(%rsi),%ymm7,%ymm7
	vpxor	96+0(%rsi),%ymm11,%ymm11
	vmovdqu	%ymm0,0+0(%rdi)
	vmovdqu	%ymm3,32+0(%rdi)
	vmovdqu	%ymm7,64+0(%rdi)
	vmovdqu	%ymm11,96+0(%rdi)

	vmovdqa	0+128(%rbp),%ymm0
	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
	vpxor	0+128(%rsi),%ymm3,%ymm3
	vpxor	32+128(%rsi),%ymm2,%ymm2
	vpxor	64+128(%rsi),%ymm6,%ymm6
	vpxor	96+128(%rsi),%ymm10,%ymm10
	vmovdqu	%ymm3,0+128(%rdi)
	vmovdqu	%ymm2,32+128(%rdi)
	vmovdqu	%ymm6,64+128(%rdi)
	vmovdqu	%ymm10,96+128(%rdi)
	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
	vpxor	0+256(%rsi),%ymm3,%ymm3
	vpxor	32+256(%rsi),%ymm1,%ymm1
	vpxor	64+256(%rsi),%ymm5,%ymm5
	vpxor	96+256(%rsi),%ymm9,%ymm9
	vmovdqu	%ymm3,0+256(%rdi)
	vmovdqu	%ymm1,32+256(%rdi)
	vmovdqu	%ymm5,64+256(%rdi)
	vmovdqu	%ymm9,96+256(%rdi)
	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
	vmovdqa	%ymm3,%ymm8

	movq	$384,%rcx
	leaq	384(%rsi),%rsi
	subq	$384,%rbx
	jmp	.Lseal_avx2_short_hash_remainder

.Lseal_avx2_320:
	vmovdqa	%ymm0,%ymm1
	vmovdqa	%ymm0,%ymm2
	vmovdqa	%ymm4,%ymm5
	vmovdqa	%ymm4,%ymm6
	vmovdqa	%ymm8,%ymm9
	vmovdqa	%ymm8,%ymm10
	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm14
	vmovdqa	%ymm4,%ymm7
	vmovdqa	%ymm8,%ymm11
	vmovdqa	%ymm12,0+160(%rbp)
	vmovdqa	%ymm13,0+192(%rbp)
	vmovdqa	%ymm14,0+224(%rbp)
	movq	$10,%r10
.Lseal_avx2_320_rounds:
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$12,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$4,%ymm4,%ymm4,%ymm4
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpsrld	$20,%ymm5,%ymm3
	vpslld	$12,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpslld	$7,%ymm5,%ymm3
	vpsrld	$25,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpalignr	$12,%ymm13,%ymm13,%ymm13
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$4,%ymm5,%ymm5,%ymm5
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpsrld	$20,%ymm6,%ymm3
	vpslld	$12,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpslld	$7,%ymm6,%ymm3
	vpsrld	$25,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpalignr	$12,%ymm14,%ymm14,%ymm14
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$4,%ymm6,%ymm6,%ymm6
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$4,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$12,%ymm4,%ymm4,%ymm4
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpsrld	$20,%ymm5,%ymm3
	vpslld	$12,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpslld	$7,%ymm5,%ymm3
	vpsrld	$25,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpalignr	$4,%ymm13,%ymm13,%ymm13
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$12,%ymm5,%ymm5,%ymm5
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpsrld	$20,%ymm6,%ymm3
	vpslld	$12,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpaddd	%ymm6,%ymm2,%ymm2
	vpxor	%ymm2,%ymm14,%ymm14
	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
	vpaddd	%ymm14,%ymm10,%ymm10
	vpxor	%ymm10,%ymm6,%ymm6
	vpslld	$7,%ymm6,%ymm3
	vpsrld	$25,%ymm6,%ymm6
	vpxor	%ymm3,%ymm6,%ymm6
	vpalignr	$4,%ymm14,%ymm14,%ymm14
	vpalignr	$8,%ymm10,%ymm10,%ymm10
	vpalignr	$12,%ymm6,%ymm6,%ymm6

	decq	%r10
	jne	.Lseal_avx2_320_rounds
	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
	vpaddd	%ymm7,%ymm4,%ymm4
	vpaddd	%ymm7,%ymm5,%ymm5
	vpaddd	%ymm7,%ymm6,%ymm6
	vpaddd	%ymm11,%ymm8,%ymm8
	vpaddd	%ymm11,%ymm9,%ymm9
	vpaddd	%ymm11,%ymm10,%ymm10
	vpaddd	0+160(%rbp),%ymm12,%ymm12
	vpaddd	0+192(%rbp),%ymm13,%ymm13
	vpaddd	0+224(%rbp),%ymm14,%ymm14
	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3

	vpand	.Lclamp(%rip),%ymm3,%ymm3
	vmovdqa	%ymm3,0+0(%rbp)

	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
	jmp	.Lseal_avx2_short

.Lseal_avx2_192:
	vmovdqa	%ymm0,%ymm1
	vmovdqa	%ymm0,%ymm2
	vmovdqa	%ymm4,%ymm5
	vmovdqa	%ymm4,%ymm6
	vmovdqa	%ymm8,%ymm9
	vmovdqa	%ymm8,%ymm10
	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
	vmovdqa	%ymm12,%ymm11
	vmovdqa	%ymm13,%ymm15
	movq	$10,%r10
.Lseal_avx2_192_rounds:
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$12,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$4,%ymm4,%ymm4,%ymm4
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpsrld	$20,%ymm5,%ymm3
	vpslld	$12,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpslld	$7,%ymm5,%ymm3
	vpsrld	$25,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpalignr	$12,%ymm13,%ymm13,%ymm13
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$4,%ymm5,%ymm5,%ymm5
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpsrld	$20,%ymm4,%ymm3
	vpslld	$12,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpaddd	%ymm4,%ymm0,%ymm0
	vpxor	%ymm0,%ymm12,%ymm12
	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
	vpaddd	%ymm12,%ymm8,%ymm8
	vpxor	%ymm8,%ymm4,%ymm4
	vpslld	$7,%ymm4,%ymm3
	vpsrld	$25,%ymm4,%ymm4
	vpxor	%ymm3,%ymm4,%ymm4
	vpalignr	$4,%ymm12,%ymm12,%ymm12
	vpalignr	$8,%ymm8,%ymm8,%ymm8
	vpalignr	$12,%ymm4,%ymm4,%ymm4
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpsrld	$20,%ymm5,%ymm3
	vpslld	$12,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpaddd	%ymm5,%ymm1,%ymm1
	vpxor	%ymm1,%ymm13,%ymm13
	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
	vpaddd	%ymm13,%ymm9,%ymm9
	vpxor	%ymm9,%ymm5,%ymm5
	vpslld	$7,%ymm5,%ymm3
	vpsrld	$25,%ymm5,%ymm5
	vpxor	%ymm3,%ymm5,%ymm5
	vpalignr	$4,%ymm13,%ymm13,%ymm13
	vpalignr	$8,%ymm9,%ymm9,%ymm9
	vpalignr	$12,%ymm5,%ymm5,%ymm5

	decq	%r10
	jne	.Lseal_avx2_192_rounds
	vpaddd	%ymm2,%ymm0,%ymm0
	vpaddd	%ymm2,%ymm1,%ymm1
	vpaddd	%ymm6,%ymm4,%ymm4
	vpaddd	%ymm6,%ymm5,%ymm5
	vpaddd	%ymm10,%ymm8,%ymm8
	vpaddd	%ymm10,%ymm9,%ymm9
	vpaddd	%ymm11,%ymm12,%ymm12
	vpaddd	%ymm15,%ymm13,%ymm13
	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3

	vpand	.Lclamp(%rip),%ymm3,%ymm3
	vmovdqa	%ymm3,0+0(%rbp)

	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
.Lseal_avx2_short:
	movq	%r8,%r8
	call	poly_hash_ad_internal
	xorq	%rcx,%rcx
.Lseal_avx2_short_hash_remainder:
	cmpq	$16,%rcx
	jb	.Lseal_avx2_short_loop
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	subq	$16,%rcx
	addq	$16,%rdi
	jmp	.Lseal_avx2_short_hash_remainder
.Lseal_avx2_short_loop:
	cmpq	$32,%rbx
	jb	.Lseal_avx2_short_tail
	subq	$32,%rbx

	vpxor	(%rsi),%ymm0,%ymm0
	vmovdqu	%ymm0,(%rdi)
	leaq	32(%rsi),%rsi

	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12
	addq	0+16(%rdi),%r10
	adcq	8+16(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	32(%rdi),%rdi

	vmovdqa	%ymm4,%ymm0
	vmovdqa	%ymm8,%ymm4
	vmovdqa	%ymm12,%ymm8
	vmovdqa	%ymm1,%ymm12
	vmovdqa	%ymm5,%ymm1
	vmovdqa	%ymm9,%ymm5
	vmovdqa	%ymm13,%ymm9
	vmovdqa	%ymm2,%ymm13
	vmovdqa	%ymm6,%ymm2
	jmp	.Lseal_avx2_short_loop
.Lseal_avx2_short_tail:
	cmpq	$16,%rbx
	jb	.Lseal_avx2_exit
	subq	$16,%rbx
	vpxor	(%rsi),%xmm0,%xmm3
	vmovdqu	%xmm3,(%rdi)
	leaq	16(%rsi),%rsi
	addq	0+0(%rdi),%r10
	adcq	8+0(%rdi),%r11
	adcq	$1,%r12
	movq	0+0+0(%rbp),%rax
	movq	%rax,%r15
	mulq	%r10
	movq	%rax,%r13
	movq	%rdx,%r14
	movq	0+0+0(%rbp),%rax
	mulq	%r11
	imulq	%r12,%r15
	addq	%rax,%r14
	adcq	%rdx,%r15
	movq	8+0+0(%rbp),%rax
	movq	%rax,%r9
	mulq	%r10
	addq	%rax,%r14
	adcq	$0,%rdx
	movq	%rdx,%r10
	movq	8+0+0(%rbp),%rax
	mulq	%r11
	addq	%rax,%r15
	adcq	$0,%rdx
	imulq	%r12,%r9
	addq	%r10,%r15
	adcq	%rdx,%r9
	movq	%r13,%r10
	movq	%r14,%r11
	movq	%r15,%r12
	andq	$3,%r12
	movq	%r15,%r13
	andq	$-4,%r13
	movq	%r9,%r14
	shrdq	$2,%r9,%r15
	shrq	$2,%r9
	addq	%r13,%r15
	adcq	%r14,%r9
	addq	%r15,%r10
	adcq	%r9,%r11
	adcq	$0,%r12

	leaq	16(%rdi),%rdi
	vextracti128	$1,%ymm0,%xmm0
.Lseal_avx2_exit:
	vzeroupper
	jmp	.Lseal_sse_tail_16
.cfi_endproc	
.size	chacha20_poly1305_seal_avx2, .-chacha20_poly1305_seal_avx2
#endif