/*
* Copyright 2018-2025 Yury Gribov
*
* The MIT License (MIT)
*
* Use of this source code is governed by MIT license that can be
* found in the LICENSE.txt file.
*/
.section .note.GNU-stack,"",@progbits
.data
.globl _libva_drm_so_tramp_table
.hidden _libva_drm_so_tramp_table
.align 8
_libva_drm_so_tramp_table:
.zero 16
.text
.globl _libva_drm_so_tramp_resolve
.hidden _libva_drm_so_tramp_resolve
.globl _libva_drm_so_save_regs_and_resolve
.hidden _libva_drm_so_save_regs_and_resolve
.type _libva_drm_so_save_regs_and_resolve, %function
_libva_drm_so_save_regs_and_resolve:
.cfi_startproc
#define PUSH_REG(reg) pushq %reg ; .cfi_adjust_cfa_offset 8; .cfi_rel_offset reg, 0
#define POP_REG(reg) popq %reg ; .cfi_adjust_cfa_offset -8; .cfi_restore reg
#define DEC_STACK(d) subq $d, %rsp; .cfi_adjust_cfa_offset d
#define INC_STACK(d) addq $d, %rsp; .cfi_adjust_cfa_offset -d
#define PUSH_MMX_REG(reg) DEC_STACK(8); movq %reg, (%rsp); .cfi_rel_offset reg, 0
#define POP_MMX_REG(reg) movq (%rsp), %reg; .cfi_restore reg; INC_STACK(8)
#define PUSH_XMM_REG(reg) DEC_STACK(16); movdqa %reg, (%rsp); .cfi_rel_offset reg, 0
#define POP_XMM_REG(reg) movdqa (%rsp), %reg; .cfi_restore reg; INC_STACK(16)
// TODO: cfi_offset/cfi_restore
#define PUSH_YMM_REG(reg) DEC_STACK(32); vmovdqu %reg, (%rsp)
#define POP_YMM_REG(reg) vmovdqu (%rsp), %reg; INC_STACK(32)
// TODO: cfi_offset/cfi_restore
#define PUSH_ZMM_REG(reg) DEC_STACK(64); vmovdqu32 %reg, (%rsp)
#define POP_ZMM_REG(reg) vmovdqu32 (%rsp), %reg; INC_STACK(64)
// Slow path which calls dlsym, taken only on first call.
// All registers are stored to handle arbitrary calling conventions
// (except x87 FPU registers which do not have to be preserved).
// For Dwarf directives, read https://www.imperialviolet.org/2017/01/18/cfi.html.
.cfi_def_cfa_offset 8 // Return address
PUSH_REG(rdi) // 16
mov 0x10(%rsp), %rdi
PUSH_REG(rbx)
PUSH_REG(rbx) // 16
PUSH_REG(rcx)
PUSH_REG(rdx) // 16
PUSH_REG(rbp)
PUSH_REG(rsi) // 16
PUSH_REG(r8)
PUSH_REG(r9) // 16
PUSH_REG(r10)
PUSH_REG(r11) // 16
PUSH_REG(r12)
PUSH_REG(r13) // 16
PUSH_REG(r14)
PUSH_REG(r15) // 16
// Maybe use cpuid instead of macro to detect current vector size...
#ifdef __AVX512F__
PUSH_ZMM_REG(zmm0)
PUSH_ZMM_REG(zmm1)
PUSH_ZMM_REG(zmm2)
PUSH_ZMM_REG(zmm3)
PUSH_ZMM_REG(zmm4)
PUSH_ZMM_REG(zmm5)
PUSH_ZMM_REG(zmm6)
PUSH_ZMM_REG(zmm7)
#elif defined __AVX__
PUSH_YMM_REG(ymm0)
PUSH_YMM_REG(ymm1)
PUSH_YMM_REG(ymm2)
PUSH_YMM_REG(ymm3)
PUSH_YMM_REG(ymm4)
PUSH_YMM_REG(ymm5)
PUSH_YMM_REG(ymm6)
PUSH_YMM_REG(ymm7)
#elif defined __SSE__
PUSH_XMM_REG(xmm0)
PUSH_XMM_REG(xmm1)
PUSH_XMM_REG(xmm2)
PUSH_XMM_REG(xmm3)
PUSH_XMM_REG(xmm4)
PUSH_XMM_REG(xmm5)
PUSH_XMM_REG(xmm6)
PUSH_XMM_REG(xmm7)
#endif
// MMX registers are not used to pass arguments so we do not save them
// Stack is just 8-byte aligned but callee will re-align to 16
call _libva_drm_so_tramp_resolve
#ifdef __AVX512F__
POP_ZMM_REG(zmm7)
POP_ZMM_REG(zmm6)
POP_ZMM_REG(zmm5)
POP_ZMM_REG(zmm4)
POP_ZMM_REG(zmm3)
POP_ZMM_REG(zmm2)
POP_ZMM_REG(zmm1)
POP_ZMM_REG(zmm0) // 16
#elif defined __AVX__
POP_YMM_REG(ymm7)
POP_YMM_REG(ymm6)
POP_YMM_REG(ymm5)
POP_YMM_REG(ymm4)
POP_YMM_REG(ymm3)
POP_YMM_REG(ymm2)
POP_YMM_REG(ymm1)
POP_YMM_REG(ymm0) // 16
#elif defined __SSE__
POP_XMM_REG(xmm7)
POP_XMM_REG(xmm6)
POP_XMM_REG(xmm5)
POP_XMM_REG(xmm4)
POP_XMM_REG(xmm3)
POP_XMM_REG(xmm2)
POP_XMM_REG(xmm1)
POP_XMM_REG(xmm0) // 16
#endif
POP_REG(r15)
POP_REG(r14) // 16
POP_REG(r13)
POP_REG(r12) // 16
POP_REG(r11)
POP_REG(r10) // 16
POP_REG(r9)
POP_REG(r8) // 16
POP_REG(rsi)
POP_REG(rbp) // 16
POP_REG(rdx)
POP_REG(rcx) // 16
POP_REG(rbx)
POP_REG(rbx) // 16
POP_REG(rdi)
ret
.cfi_endproc
/*
* Copyright 2018-2025 Yury Gribov
*
* The MIT License (MIT)
*
* Use of this source code is governed by MIT license that can be
* found in the LICENSE.txt file.
*/
.globl vaGetDisplayDRM
.p2align 4
.type vaGetDisplayDRM, %function
#ifndef IMPLIB_EXPORT_SHIMS
.hidden vaGetDisplayDRM
#endif
vaGetDisplayDRM:
.cfi_startproc
.cfi_def_cfa_offset 8 // Return address
// Intel opt. manual says to
// "make the fall-through code following a conditional branch be the likely target for a branch with a forward target"
// to hint static predictor.
cmpq $0, _libva_drm_so_tramp_table+0(%rip)
je 2f
1:
jmp *_libva_drm_so_tramp_table+0(%rip)
2:
pushq $0
.cfi_adjust_cfa_offset 8
call _libva_drm_so_save_regs_and_resolve
addq $8, %rsp
.cfi_adjust_cfa_offset -8
jmp *%rax
.cfi_endproc