#include "config.h"
#include "parasail/cpuid.h"
#if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__PPC__) || defined(__powerpc64__) || defined(__PPC64__)
int parasail_can_use_avx512vbmi() { return 0; }
int parasail_can_use_avx512bw() { return 0; }
int parasail_can_use_avx512f() { return 0; }
int parasail_can_use_avx2() { return 0; }
int parasail_can_use_sse41() { return 0; }
int parasail_can_use_sse2() { return 0; }
#if defined(__powerpc__) || defined(__PPC__) || defined(__powerpc64__) || defined(__PPC64__)
int parasail_can_use_altivec()
{
#if HAVE_ALTIVEC
return 1;
#else
return 0;
#endif
}
#else
int parasail_can_use_altivec() { return 0; }
#endif
#if defined(__arm__) || defined(__aarch64__)
int parasail_can_use_neon()
{
#if HAVE_NEON
return 1;
#else
return 0;
#endif
}
#else
int parasail_can_use_neon() { return 0; }
#endif
#else
#include <stdint.h>
#if defined(_MSC_VER)
# include <intrin.h>
#endif
static void run_cpuid(uint32_t eax, uint32_t ecx, uint32_t* abcd)
{
#if defined(_MSC_VER)
__cpuidex(abcd, eax, ecx);
#else
uint32_t ebx=0, edx=0;
# if defined( __i386__ ) && defined ( __PIC__ )
__asm__ ( "movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi" : "=D" (ebx),
# else
__asm__ ( "cpuid" : "+b" (ebx),
# endif
"+a" (eax), "+c" (ecx), "=d" (edx) );
abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx;
#endif
}
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300)
#include <immintrin.h>
static int check_4th_gen_intel_core_features()
{
#if 0#else
const int the_4th_gen_features = _FEATURE_AVX2;
#endif
return _may_i_use_cpu_feature( the_4th_gen_features );
}
static int has_intel_avx512f_features()
{
const unsigned long avx512f_features =
(_FEATURE_AVX512F | _FEATURE_AVX512ER | _FEATURE_AVX512PF | _FEATURE_AVX512CD);
return _may_i_use_cpu_feature( avx512f_features );
}
static int has_intel_avx512bw_features()
{
const unsigned long avx512f_features =
(_FEATURE_AVX512VL | _FEATURE_AVX512BW | _FEATURE_AVX512DQ);
return _may_i_use_cpu_feature( avx512f_features );
}
static int has_intel_avx512vbmi_features()
{
const unsigned long avx512vbmi_features = (_FEATURE_AVX512VBMI);
return _may_i_use_cpu_feature( avx512vbmi_features );
}
#else
static int check_xcr0_ymm()
{
#if HAVE_XGETBV
uint32_t xcr0;
#if defined(_MSC_VER)
xcr0 = (uint32_t)_xgetbv(0);
#else
__asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
#endif
return ((xcr0 & 6) == 6);
#else
return 0;
#endif
}
static int check_xcr0_zmm()
{
#if HAVE_XGETBV
uint32_t xcr0;
uint32_t zmm_ymm_xmm = (7U << 5) | (1U << 2) | (1U << 1);
#if defined(_MSC_VER)
xcr0 = (uint32_t)_xgetbv(0);
#else
__asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" );
#endif
return ((xcr0 & zmm_ymm_xmm) == zmm_ymm_xmm);
#else
return 0;
#endif
}
static int check_4th_gen_intel_core_features()
{
uint32_t abcd[4];
uint32_t avx2_mask = (1U << 5);
#if 0#endif
run_cpuid( 7, 0, abcd );
#if 0#else
if ( (abcd[1] & avx2_mask) != avx2_mask )
return 0;
#endif
#if 0#endif
return 1;
}
static int has_intel_avx512f_features() {
uint32_t abcd[4];
uint32_t osxsave_mask = (1U << 27);
uint32_t avx512f_mask = (1U << 16) |
(1U << 26) |
(1U << 27) |
(1U << 28);
run_cpuid( 1, 0, abcd );
if ( (abcd[2] & osxsave_mask) != osxsave_mask )
return 0;
if ( ! check_xcr0_zmm() )
return 0;
run_cpuid( 7, 0, abcd );
if ( (abcd[1] & avx512f_mask) != avx512f_mask )
return 0;
return 1;
}
static int has_intel_avx512bw_features() {
uint32_t abcd[4];
uint32_t osxsave_mask = (1U << 27);
uint32_t avx512bw_mask = (1U << 30) |
(1U << 17) |
(1U << 31);
run_cpuid( 1, 0, abcd );
if ( (abcd[2] & osxsave_mask) != osxsave_mask )
return 0;
if ( ! check_xcr0_zmm() )
return 0;
run_cpuid( 7, 0, abcd );
if ( (abcd[1] & avx512bw_mask) != avx512bw_mask )
return 0;
return 1;
}
static int has_intel_avx512vbmi_features()
{
uint32_t abcd[4];
uint32_t osxsave_mask = (1U << 27);
uint32_t avx512vbmi_mask = (1U << 1);
run_cpuid( 1, 0, abcd );
if ( (abcd[2] & osxsave_mask) != osxsave_mask )
return 0;
if ( ! check_xcr0_zmm() )
return 0;
run_cpuid( 7, 0, abcd );
if ( (abcd[2] & avx512vbmi_mask) != avx512vbmi_mask )
return 0;
return 1;
}
#endif
static int check_sse41()
{
uint32_t info[4];
uint32_t nIds;
run_cpuid(0, 0, info);
nIds = info[0];
if (nIds >= 1){
run_cpuid(0x00000001, 0, info);
return (info[2] & (1U << 19)) != 0;
}
return 0;
}
static int check_sse2()
{
uint32_t info[4];
uint32_t nIds;
run_cpuid(0, 0, info);
nIds = info[0];
if (nIds >= 1){
run_cpuid(0x00000001, 0, info);
return (info[3] & (1U << 26)) != 0;
}
return 0;
}
int parasail_can_use_avx512vbmi()
{
static int avx512vbmi_features_available = -1;
if (avx512vbmi_features_available < 0 )
avx512vbmi_features_available = has_intel_avx512vbmi_features();
return avx512vbmi_features_available;
}
int parasail_can_use_avx512bw()
{
static int avx512bw_features_available = -1;
if (avx512bw_features_available < 0 )
avx512bw_features_available = has_intel_avx512bw_features();
return avx512bw_features_available;
}
int parasail_can_use_avx512f()
{
static int avx512f_features_available = -1;
if (avx512f_features_available < 0 )
avx512f_features_available = has_intel_avx512f_features();
return avx512f_features_available;
}
int parasail_can_use_avx2()
{
static int the_4th_gen_features_available = -1;
if (the_4th_gen_features_available < 0 )
the_4th_gen_features_available = check_4th_gen_intel_core_features();
return the_4th_gen_features_available;
}
int parasail_can_use_sse41()
{
static int can_use_sse41 = -1;
if (can_use_sse41 < 0)
can_use_sse41 = check_sse41();
return can_use_sse41;
}
int parasail_can_use_sse2()
{
static int can_use_sse2 = -1;
if (can_use_sse2 < 0)
can_use_sse2 = check_sse2();
return can_use_sse2;
}
int parasail_can_use_altivec()
{
return 0;
}
int parasail_can_use_neon()
{
return 0;
}
#endif