#include <openssl/cpu.h>
#if !defined(OPENSSL_NO_ASM) && (defined(OPENSSL_X86) || defined(OPENSSL_X86_64))
#include <inttypes.h>
#if defined(OPENSSL_WINDOWS)
#pragma warning(push, 3)
#include <immintrin.h>
#include <intrin.h>
#pragma warning(pop)
#endif
#include "internal.h"
static void OPENSSL_cpuid(uint32_t *out_eax, uint32_t *out_ebx,
uint32_t *out_ecx, uint32_t *out_edx, uint32_t leaf) {
#if defined(OPENSSL_WINDOWS)
int tmp[4];
__cpuid(tmp, (int)leaf);
*out_eax = (uint32_t)tmp[0];
*out_ebx = (uint32_t)tmp[1];
*out_ecx = (uint32_t)tmp[2];
*out_edx = (uint32_t)tmp[3];
#elif defined(__pic__) && defined(OPENSSL_32_BIT)
__asm__ volatile (
"xor %%ecx, %%ecx\n"
"mov %%ebx, %%edi\n"
"cpuid\n"
"xchg %%edi, %%ebx\n"
: "=a"(*out_eax), "=D"(*out_ebx), "=c"(*out_ecx), "=d"(*out_edx)
: "a"(leaf)
);
#else
__asm__ volatile (
"xor %%ecx, %%ecx\n"
"cpuid\n"
: "=a"(*out_eax), "=b"(*out_ebx), "=c"(*out_ecx), "=d"(*out_edx)
: "a"(leaf)
);
#endif
}
static uint64_t OPENSSL_xgetbv(uint32_t xcr) {
#if defined(OPENSSL_WINDOWS)
return (uint64_t)_xgetbv(xcr);
#else
uint32_t eax, edx;
__asm__ volatile ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
return (((uint64_t)edx) << 32) | eax;
#endif
}
void OPENSSL_cpuid_setup(void) {
uint32_t eax, ebx, ecx, edx;
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0);
uint32_t num_ids = eax;
int is_intel = ebx == 0x756e6547 &&
edx == 0x49656e69 &&
ecx == 0x6c65746e ;
int is_amd = ebx == 0x68747541 &&
edx == 0x69746e65 &&
ecx == 0x444d4163 ;
int has_amd_xop = 0;
if (is_amd) {
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0x80000000);
uint32_t num_extended_ids = eax;
if (num_extended_ids >= 0x80000001) {
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0x80000001);
if (ecx & (1 << 11)) {
has_amd_xop = 1;
}
}
}
uint32_t extended_features = 0;
if (num_ids >= 7) {
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 7);
extended_features = ebx;
}
uint32_t cores_per_cache = 0;
if (is_amd) {
cores_per_cache = 1;
} else if (num_ids >= 4) {
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 4);
cores_per_cache = 1 + ((eax >> 14) & 0xfff);
}
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1);
if (edx & (1 << 28)) {
uint32_t num_logical_cores = (ebx >> 16) & 0xff;
if (cores_per_cache == 1 || num_logical_cores <= 1) {
edx &= ~(1 << 28);
}
}
edx &= ~(1 << 20);
if (is_intel) {
edx |= (1 << 30);
} else {
edx &= ~(1 << 30);
}
if (has_amd_xop) {
ecx |= (1 << 11);
} else {
ecx &= ~(1 << 11);
}
uint64_t xcr0 = 0;
if (ecx & (1 << 27)) {
xcr0 = OPENSSL_xgetbv(0);
}
if ((xcr0 & 6) != 6) {
ecx &= ~(1 << 28);
ecx &= ~(1 << 12);
ecx &= ~(1 << 11);
extended_features &= ~(1 << 5);
}
OPENSSL_ia32cap_P[0] = edx;
OPENSSL_ia32cap_P[1] = ecx;
OPENSSL_ia32cap_P[2] = extended_features;
OPENSSL_ia32cap_P[3] = 0;
}
#endif